diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json deleted file mode 100644 index 47e6e269256b48ff6b5dbcea8048027a006c3c46..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json deleted file mode 100644 index fce0e6e6c6a8de11d54b06636ef7077f4dce6ee2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [128001, 128008, 128009], "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 32, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 8, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 8.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json deleted file mode 100644 index 3c2a61ee3279e1384377aeea622ed51437bcfb81..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json +++ /dev/null @@ -1 +0,0 @@ -{"_attn_implementation_autoset": true, "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json deleted file mode 100644 index 6c36b2bd7291c938320caed7436dce9fbcf2da35..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "openai-community/gpt2", - "_task": "text-generation", - "activation_function": "gelu_new", - "architectures": [ - "GPT2LMHeadModel" - ], - "attn_pdrop": 0.1, - "embd_pdrop": 0.1, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "model_type": "gpt2", - "n_ctx": 1024, - "n_embd": 768, - "n_head": 12, - "n_inner": null, - "n_layer": 12, - "n_positions": 1024, - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 16, - "checkpoint_id": "openai-community/gpt2", - "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 1024, - "task": "text-generation" - }, - "reorder_and_upcast_attn": false, - "resid_pdrop": 0.1, - "scale_attn_by_inverse_layer_idx": false, - "scale_attn_weights": true, - "summary_activation": null, - "summary_first_dropout": 0.1, - "summary_proj_to_labels": true, - "summary_type": "cls_index", - "summary_use_proj": true, - "task_specific_params": { - "text-generation": { - "do_sample": true, - "max_length": 50 - } - }, - "use_cache": true, - "vocab_size": 50257 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json deleted file mode 100644 index ae1ba6a2e99d1c62b570e443712d0961acdda11e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "openai-community/gpt2", - "_task": "text-generation", - "activation_function": "gelu_new", - "architectures": [ - "GPT2LMHeadModel" - ], - "attn_pdrop": 0.1, - "embd_pdrop": 0.1, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "model_type": "gpt2", - "n_ctx": 1024, - "n_embd": 768, - "n_head": 12, - "n_inner": null, - "n_layer": 12, - "n_positions": 1024, - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "openai-community/gpt2", - "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 1024, - "task": "text-generation" - }, - "reorder_and_upcast_attn": false, - "resid_pdrop": 0.1, - "scale_attn_by_inverse_layer_idx": false, - "scale_attn_weights": true, - "summary_activation": null, - "summary_first_dropout": 0.1, - "summary_proj_to_labels": true, - "summary_type": "cls_index", - "summary_use_proj": true, - "task_specific_params": { - "text-generation": { - "do_sample": true, - "max_length": 50 - } - }, - "use_cache": true, - "vocab_size": 50257 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json deleted file mode 100644 index 54c743d5b387de50d038a42d1a31e3f774514951..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "openai-community/gpt2", - "_task": "text-generation", - "activation_function": "gelu_new", - "architectures": [ - "GPT2LMHeadModel" - ], - "attn_pdrop": 0.1, - "embd_pdrop": 0.1, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "model_type": "gpt2", - "n_ctx": 1024, - "n_embd": 768, - "n_head": 12, - "n_inner": null, - "n_layer": 12, - "n_positions": 1024, - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "openai-community/gpt2", - "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 1024, - "task": "text-generation" - }, - "reorder_and_upcast_attn": false, - "resid_pdrop": 0.1, - "scale_attn_by_inverse_layer_idx": false, - "scale_attn_weights": true, - "summary_activation": null, - "summary_first_dropout": 0.1, - "summary_proj_to_labels": true, - "summary_type": "cls_index", - "summary_use_proj": true, - "task_specific_params": { - "text-generation": { - "do_sample": true, - "max_length": 50 - } - }, - "use_cache": true, - "vocab_size": 50257 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json deleted file mode 100644 index ed4118ffd7d09072e646504adc0c1bbd8840e5b4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json deleted file mode 100644 index e9a91da3f789e73007f8b466f3be2030dbe7fc04..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json deleted file mode 100644 index 28a71a7ac2d943830129a0549ab7dce46ff918a1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json deleted file mode 100644 index af04ade218d1090a87c90d59ad41c2d47fb4a75e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json deleted file mode 100644 index 61e12fc5b571404a540fce89ccd444ceae56ff94..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json deleted file mode 100644 index 94f8ff72839a11634139fbe025fdc3803ee75a77..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json deleted file mode 100644 index efe2d09fa944fdb83bff30ab35f27a3a138a4a09..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json deleted file mode 100644 index 9a3fb0b9c26f76f9def5cbc827c481a923a337a4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json deleted file mode 100644 index 63f9f230598f876a96caa4450dd69a8e9ebe90af..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json deleted file mode 100644 index 715e13073d0bb32d9784b59b9c64509a1129a179..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json deleted file mode 100644 index c7e94d95ef4604bbaf99800241cb7e374457e26d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json deleted file mode 100644 index 2d7613fb2f82ab8143f7135db374596f16f32e53..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json deleted file mode 100644 index 9513042a9938aefab27cfce04fee93a5166e22cf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json deleted file mode 100644 index 16e67939522718c09b63055bfb586e2af943bd5a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json deleted file mode 100644 index 719dbf7f9e3095b674c84f265d7a0cb644c24ddd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json deleted file mode 100644 index ac746b2052a4107b9d5b182150240d97e6bc08ca..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json deleted file mode 100644 index 629ed88cb04e9b3377b4039c2f4222bdf13965f4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json deleted file mode 100644 index 0ab5c12ec385a7cbb8782dda1fefa4445bc595bb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 2048, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json deleted file mode 100644 index 8d66ef0ffda3a66ba7c484f7fa31fae8c977f4b8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 2048, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json deleted file mode 100644 index 143adb7c721ace65052cdd5dbc4015529fcbc86e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 24, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json deleted file mode 100644 index c34105f689a30e95c9beb00a707a7b8be09fa77c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 24, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json deleted file mode 100644 index 44f8d3aec6aa24a47d3679f8b671eaff16bd034b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json deleted file mode 100644 index 8d3dc6734dfac0ff9ea5e3edfb6dcf6ea6d1f2c2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json deleted file mode 100644 index be5499dd30f29b97181ab52f21adfac068468e36..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json deleted file mode 100644 index 13680598e9cceb39cf0091738956f15616985d6a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json deleted file mode 100644 index ca010f05eee29d68636e4a2a0147d2ba7f6ed109..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json deleted file mode 100644 index ad591eb2a97bdc88260a6e46f9bbe8e3e7c647b1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json deleted file mode 100644 index d6665d3c7a622adb3d1059c0db67bb336aaf35fb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json deleted file mode 100644 index 522fa5a7fdbdf9daf6aa240185224f81ef1ee451..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json deleted file mode 100644 index 617f5c5dff0bc4a59a64a3cd287173381ac0923c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json deleted file mode 100644 index 66a1447611049d1ea4b7c3cc25404d17f3e3ac1d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json deleted file mode 100644 index e74e2f9d392cf7f6ff7357b536b89569507b0ca0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json deleted file mode 100644 index 17fe450f11de48a8533152c682f4463acb8aaedc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json deleted file mode 100644 index cf0b9b19fd56be5d8f249ba809ba60ad1861002a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json deleted file mode 100644 index f6795939cd18404d85aa584a22a05818fe5de0a8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json deleted file mode 100644 index 4464e1c270b4974ff80433aef67fe2dcf4fdd36d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json deleted file mode 100644 index 6c9732bf25d2480fbbfc26c037403da0dee0ecca..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json deleted file mode 100644 index 2a57e132cb880fffef33fb2e8a333ec4f71c554d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json deleted file mode 100644 index 59c5fa9968de8eec4cbaad7d34e440deb6faeeef..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "HuggingFaceH4/zephyr-7b-beta", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", - "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json deleted file mode 100644 index f904c7b402097f53d4c6f82558513c2a4fd40613..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Intel/neural-chat-7b-v3-3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "Intel/neural-chat-7b-v3-3", - "checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json deleted file mode 100644 index ecc87ef038042e4ee1e633dd6c2da5ad1af15786..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Intel/neural-chat-7b-v3-3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Intel/neural-chat-7b-v3-3", - "checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json deleted file mode 100644 index d5ae54af10f3453e5187002cb714a3ee3bf06af9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.1", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1", - "checkpoint_revision": "2dcff66eac0c01dc50e4c41eea959968232187fe", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json deleted file mode 100644 index 334e4f782852b050c17c58634a7270d4773876af..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.2", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", - "checkpoint_revision": "3ad372fc79158a2148299e3318516c786aeded6c", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json deleted file mode 100644 index 2d39d32a5590c121a0e1f1057717c56ad2314049..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/319da426e18041fae32f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/319da426e18041fae32f.json deleted file mode 100644 index 0173eeb90d4948ec7600e4ba77a06330ac3a14a0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/319da426e18041fae32f.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/476a70cd474874c042f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/476a70cd474874c042f1.json deleted file mode 100644 index dbdfd904794014a9b7c331728be85e071410338d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/476a70cd474874c042f1.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/63ff45b6a619b75535f7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/63ff45b6a619b75535f7.json deleted file mode 100644 index 3dfc0a4eeea11ca3c1b3405ab4e447e31f459fdf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/63ff45b6a619b75535f7.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/77037ea362294b200121.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/77037ea362294b200121.json deleted file mode 100644 index 7ae961bcdf897557bde9b0f911185431a26cc8c9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/77037ea362294b200121.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/cdf9575ea00ac41d8b58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/cdf9575ea00ac41d8b58.json deleted file mode 100644 index 383b125272a24e29015564363edd0a1d1b1b021e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/cdf9575ea00ac41d8b58.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 4, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/d5087b3021c8d27390b4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/d5087b3021c8d27390b4.json deleted file mode 100644 index 8c656c6dda7f9c2c6bfac0e45b5979da9b15e905..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/d5087b3021c8d27390b4.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/e6b41b2f3070ea47dc29.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/e6b41b2f3070ea47dc29.json deleted file mode 100644 index 438cfced3231d10b572c151f3b3ee207994b51fb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/e6b41b2f3070ea47dc29.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/f592e783f4035c10d44c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/f592e783f4035c10d44c.json deleted file mode 100644 index be96fbaa130f8da7153dfd86416f7e98cd156734..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/f592e783f4035c10d44c.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3", - "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-Small-Instruct-2409/d7cc42cb64fd48aaa6e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-Small-Instruct-2409/d7cc42cb64fd48aaa6e4.json deleted file mode 100644 index 64dd6016f9adb2e060ec351508b54a5aecc16d22..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-Small-Instruct-2409/d7cc42cb64fd48aaa6e4.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mistral-Small-Instruct-2409", - "_task": "text-generation", - "architectures": [ - "MistralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 6144, - "initializer_range": 0.02, - "intermediate_size": 16384, - "max_position_embeddings": 32768, - "model_type": "mistral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "mistralai/Mistral-Small-Instruct-2409", - "checkpoint_revision": "8012044390bdc1c6d8ab162f5416220f43bf517b", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 12, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 48, - "num_hidden_layers": 56, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32768 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/6cb938012d9212c10b69.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/6cb938012d9212c10b69.json deleted file mode 100644 index f75abec3c83b4d99557d1ad8172e480391e0a4d2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/6cb938012d9212c10b69.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mixtral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "checkpoint_revision": "41bd4c9e7e4fb318ca40e721131d4933966c2cc1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 24, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "router_aux_loss_coef": 0.02, - "router_jitter_noise": 0.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/dfe8221ddaab628c3c71.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/dfe8221ddaab628c3c71.json deleted file mode 100644 index 62f9c823687a273cfc1838217dfb8b185ebe4fb9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/dfe8221ddaab628c3c71.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mixtral", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "checkpoint_revision": "41bd4c9e7e4fb318ca40e721131d4933966c2cc1", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 24, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "router_aux_loss_coef": 0.02, - "router_jitter_noise": 0.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/06e7ec6929127dd02d5c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/06e7ec6929127dd02d5c.json deleted file mode 100644 index 5ad20d2437c94c276a72396c17fd19e643d1c15a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/06e7ec6929127dd02d5c.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/36dc45f342a64a183705.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/36dc45f342a64a183705.json deleted file mode 100644 index 6e06c8c3977312dbf75f1c753a916568d496ddc9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/Phi-3-mini-4k-instruct/36dc45f342a64a183705.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/7e9bce30fa4577df5c63.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/7e9bce30fa4577df5c63.json deleted file mode 100644 index 2a0e333cff6d2be5bc73f1c38c8f93af0130976b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/7e9bce30fa4577df5c63.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 10, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/c1de9e8918c3d6446bf1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/c1de9e8918c3d6446bf1.json deleted file mode 100644 index 3f77faae4ea6a5e7eb909c294a0194e66ff68b76..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/microsoft/phi-4/c1de9e8918c3d6446bf1.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 10, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/62079d5669d389f5ddea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/62079d5669d389f5ddea.json deleted file mode 100644 index 3463e301f2b6e07ac260dc4a5c2eaadc42ee9390..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/62079d5669d389f5ddea.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/d6fa3a9fc1098a5758b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/d6fa3a9fc1098a5758b7.json deleted file mode 100644 index 79b69455b168fa383920418eedf72b7e48d36a0c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-0.5B/d6fa3a9fc1098a5758b7.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/53f79786acd24a84f1e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/53f79786acd24a84f1e8.json deleted file mode 100644 index 80a459740e5b11299b0bff0f459833247bfff013..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/53f79786acd24a84f1e8.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/771f2227e5615ad64379.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/771f2227e5615ad64379.json deleted file mode 100644 index ebc25ea540f97e2369295786e2aa172e44f2bb3b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-1.5B/771f2227e5615ad64379.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/0dd1bb4df1da89aabd5d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/0dd1bb4df1da89aabd5d.json deleted file mode 100644 index 4f140ca72850e3575f9a3107b673fb4d95cd534d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/0dd1bb4df1da89aabd5d.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/ac4bf66e60387adae860.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/ac4bf66e60387adae860.json deleted file mode 100644 index d4716c4069f7754be8209d6032a48ea9b94a1c6c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-14B/ac4bf66e60387adae860.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/0d5947d8d8540ed9a751.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/0d5947d8d8540ed9a751.json deleted file mode 100644 index 108cbca9d5f68783b0a762681aa6e87653fb633d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/0d5947d8d8540ed9a751.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/d93a10bb6762b29f3bfc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/d93a10bb6762b29f3bfc.json deleted file mode 100644 index 22dfa493b364cec57275c8acaad75db7648a4e3a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-32B-Instruct/d93a10bb6762b29f3bfc.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-72B-Instruct/024062facd4445ed9c74.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-72B-Instruct/024062facd4445ed9c74.json deleted file mode 100644 index 2c932efc489810a00bbe4de9692bd5489a630d72..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-72B-Instruct/024062facd4445ed9c74.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 24, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/4bc7a2cb4c31fec10cb4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/4bc7a2cb4c31fec10cb4.json deleted file mode 100644 index e53a4021577716647957d65e1189707b33a461e3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/4bc7a2cb4c31fec10cb4.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/7f3993af4f29c6eb0c7f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/7f3993af4f29c6eb0c7f.json deleted file mode 100644 index 707a9da22942b7fd1592fa66f119f5366e9558b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/7f3993af4f29c6eb0c7f.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/b99bab88b5323df2c79e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/b99bab88b5323df2c79e.json deleted file mode 100644 index 897c21840c6d5229b850dbcd61def8ea960c4aca..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/b99bab88b5323df2c79e.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/c33de8a827c15bf5d69f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/c33de8a827c15bf5d69f.json deleted file mode 100644 index dcd4b4c28547f2fa8febe0913a78756dde641c32..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/c33de8a827c15bf5d69f.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/f2752f6ddf81ee5138ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/f2752f6ddf81ee5138ff.json deleted file mode 100644 index 7a8e0d0045f5e8a2e589398eca69652b312b2ca0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/Qwen/Qwen2.5-7B-Instruct/f2752f6ddf81ee5138ff.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a8e18840d03c8316979d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a8e18840d03c8316979d.json deleted file mode 100644 index 10ea44003e4dbcc64c66ff6921b627981c844ad6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a8e18840d03c8316979d.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/eb50acc9085524a5c2a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/eb50acc9085524a5c2a8.json deleted file mode 100644 index 57eed414922cc2f0fa0d857142a781b0b8859921..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/eb50acc9085524a5c2a8.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/1d5fc665b502a0e7cf82.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/1d5fc665b502a0e7cf82.json deleted file mode 100644 index 658082cdc39f41086d9a665f8c9e13973892e47f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/1d5fc665b502a0e7cf82.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fece10c42b62c97c6dc2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fece10c42b62c97c6dc2.json deleted file mode 100644 index b0f52b9cc59cb4707911e3e8a841e468006a8a1c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fece10c42b62c97c6dc2.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6f24e75b3ee633c1f237.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6f24e75b3ee633c1f237.json deleted file mode 100644 index ab51fc228030c3bc1c453adc76728c26b52d4c8c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6f24e75b3ee633c1f237.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8238095a820b8ea16124.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8238095a820b8ea16124.json deleted file mode 100644 index 6aaa9ea58b70c1d0677c647c6206729bf5b6681f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8238095a820b8ea16124.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/5fa22e4777945bcde3cf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/5fa22e4777945bcde3cf.json deleted file mode 100644 index cc741327e944f53db00dcca740a81a4fbb961c75..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/5fa22e4777945bcde3cf.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6cde1bb9ae98459a81df.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6cde1bb9ae98459a81df.json deleted file mode 100644 index 3ab51c4c4fccc4c79f370c82e14caaf3dac188d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6cde1bb9ae98459a81df.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/8c666457e2fd65e7a485.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/8c666457e2fd65e7a485.json deleted file mode 100644 index e150f9e665636b08fdac835a0aa894cac55f20c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/8c666457e2fd65e7a485.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 2, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e178ba6d57c6db7dfa0c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e178ba6d57c6db7dfa0c.json deleted file mode 100644 index bc04394d3aa8942a1c54fda6e0f81c2174140e30..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e178ba6d57c6db7dfa0c.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e26667b714e642d0771d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e26667b714e642d0771d.json deleted file mode 100644 index 510143f4f487c25aada7b6238548fab957e394cb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/e26667b714e642d0771d.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "num_cores": 8, - "sequence_length": 4096, - "task": "text-generation" - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/228f8f4e6df763ef96f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/228f8f4e6df763ef96f1.json deleted file mode 100644 index 0050f94e5219f587aaeff34e5d4544b7d92a2f75..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/228f8f4e6df763ef96f1.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2307eff613723fbcf298.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2307eff613723fbcf298.json deleted file mode 100644 index 466379d096273a58d1d95ac8a66ecfd8253c6591..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2307eff613723fbcf298.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/389793d0e488a1143c2f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/389793d0e488a1143c2f.json deleted file mode 100644 index c715571243c5c57c3d1883b99fbba8ccd96554aa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/389793d0e488a1143c2f.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3f184457cb92c4d568a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3f184457cb92c4d568a6.json deleted file mode 100644 index 30a312caffaa0b4e92d9f92d2b4dbd884ca599ac..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3f184457cb92c4d568a6.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/466e5afc7eddf6225299.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/466e5afc7eddf6225299.json deleted file mode 100644 index d69f845d361234695d3a0d21aa9b6709c4fbf0e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/466e5afc7eddf6225299.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/957fcb6a4a7274239827.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/957fcb6a4a7274239827.json deleted file mode 100644 index 46d4a3c194257f1ac05c32e9a40dcbb80da30202..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/957fcb6a4a7274239827.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/b496e32f7d9c89e5368a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/b496e32f7d9c89e5368a.json deleted file mode 100644 index 0e2894d37bf6e8503df39b2b2c6f265861f8ef05..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/b496e32f7d9c89e5368a.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c411a7ded2b38cfab7d1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c411a7ded2b38cfab7d1.json deleted file mode 100644 index 116ad650a5e6f4c4e62fd70429d4d7bc7bd6b121..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c411a7ded2b38cfab7d1.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f18fff8ffa90866c5a94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f18fff8ffa90866c5a94.json deleted file mode 100644 index edfb141dd86ab926a57c25806de91e9e4a467eec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f18fff8ffa90866c5a94.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f6052478503ed4e74738.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f6052478503ed4e74738.json deleted file mode 100644 index 4fc866e7915fa68c3ef536870b273d0832d8500d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/f6052478503ed4e74738.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/03a3e2f5c0a2f8722655.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/03a3e2f5c0a2f8722655.json deleted file mode 100644 index ab01c791492067036412b5516751f6b31133b73f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/03a3e2f5c0a2f8722655.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/04cba62beb67e6eb4cb5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/04cba62beb67e6eb4cb5.json deleted file mode 100644 index 39eeb64473c616aff1434a32065c7e1410c51247..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/04cba62beb67e6eb4cb5.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ec7b3f0b0ee9d3cfec6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ec7b3f0b0ee9d3cfec6.json deleted file mode 100644 index fb2dca4c09d6a96cb3e5c0b1b447786369e0f900..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ec7b3f0b0ee9d3cfec6.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ee126aad67ae40704c2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ee126aad67ae40704c2.json deleted file mode 100644 index 183ba479f128b2d8165371ac60b2412a43277ebe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/0ee126aad67ae40704c2.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/446627ec82be26bed2ae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/446627ec82be26bed2ae.json deleted file mode 100644 index 07de385c3018c17222bf294382b59acf48757df2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/446627ec82be26bed2ae.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/9ff75fbfe6f8fc975f79.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/9ff75fbfe6f8fc975f79.json deleted file mode 100644 index 75d7b7d844604a28a5d168abf2354b2ab21d59c5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/9ff75fbfe6f8fc975f79.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/a56f83e94890d94d9d77.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/a56f83e94890d94d9d77.json deleted file mode 100644 index 4a4a5e940bf9a8d25d7b986add6d72d3dac260d0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/a56f83e94890d94d9d77.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/aa528f1ffc084b0484c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/aa528f1ffc084b0484c7.json deleted file mode 100644 index 4a2bea820ff2a102df3017a9d9b9082a2c993c91..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/aa528f1ffc084b0484c7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/b10bbb75006336c1cfe0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/b10bbb75006336c1cfe0.json deleted file mode 100644 index a436208930a3a3fce1ecfdc67b2d3a81425fe788..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/b10bbb75006336c1cfe0.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/db5996636cc43f938771.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/db5996636cc43f938771.json deleted file mode 100644 index 4956b9017147f08596164236180ec7a13395d353..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/db5996636cc43f938771.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3483a0ddae299487c7b9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3483a0ddae299487c7b9.json deleted file mode 100644 index 54a9a3586ac4213a387820c2b0b9de2205c6b163..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3483a0ddae299487c7b9.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2480602be8567cd2ced2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2480602be8567cd2ced2.json deleted file mode 100644 index bb0f01d032db68dc9cf67879bd9197f251bacbd6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2480602be8567cd2ced2.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/6820dca3105407a25dc2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/6820dca3105407a25dc2.json deleted file mode 100644 index 7fdb3bc681d8864f35186d4ebdb475d98f7a0694..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/6820dca3105407a25dc2.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a0e9e4e534fabf6c8a01.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a0e9e4e534fabf6c8a01.json deleted file mode 100644 index d2855942c558a47c1aa8f21fbf05781a6594c290..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a0e9e4e534fabf6c8a01.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a42efb3e832df13303fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a42efb3e832df13303fe.json deleted file mode 100644 index 89a65e71c9d4885a4a3b476fb6bd6654fc783442..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a42efb3e832df13303fe.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b191455bededd30f0cb9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b191455bededd30f0cb9.json deleted file mode 100644 index 9622f2f2ed84545e96107f63e9c8357776726fb2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b191455bededd30f0cb9.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cb1712d3dcfb9cee79a2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cb1712d3dcfb9cee79a2.json deleted file mode 100644 index a7e3d2bc34876601878d874cdce16cc039679837..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cb1712d3dcfb9cee79a2.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f13b36aea2e803b3e564.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f13b36aea2e803b3e564.json deleted file mode 100644 index cb73f339762466358dedeaa9a2708a784c6e84c9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f13b36aea2e803b3e564.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json deleted file mode 100644 index 00f37b39070604c599e7f84b5d8a66d296a841c2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/0e8b7ff3c2a48897072a.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 131072, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 131072, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 131072, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 1, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json deleted file mode 100644 index 5cabd82efb84d5e0fcd591d8276b5615264f5c34..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 1, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json deleted file mode 100644 index 53b9a8e6d4bf97e8891cc092df98161955e8b483..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "ctx_batch_size": 2, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 2, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json deleted file mode 100644 index c0ecae3aca95f6d474125537a85d0719e083708f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 1, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-13b-hf/30aac7419b3c3f793ebc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-13b-hf/30aac7419b3c3f793ebc.json deleted file mode 100644 index a83cd9a179485e5726a5a702136355795c91d190..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-13b-hf/30aac7419b3c3f793ebc.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-7b-hf/efc2d6d0255171ce715c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-7b-hf/efc2d6d0255171ce715c.json deleted file mode 100644 index d16013b2ce9d107841227dbce3c3df9937ca2491..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-2-7b-hf/efc2d6d0255171ce715c.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/14db3db8bb825e053b0a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/14db3db8bb825e053b0a.json deleted file mode 100644 index 48cf3e7f7145a2c8abcf932555ae90f84203a6a9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/14db3db8bb825e053b0a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/61bf450ac9dd64f6f3c5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/61bf450ac9dd64f6f3c5.json deleted file mode 100644 index 696cbcf0c1322d3795dca8324870be313169cabe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/61bf450ac9dd64f6f3c5.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json deleted file mode 100644 index 76af17222f6ee754e11b18743e7cb909b1742a7f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "ctx_batch_size": 8, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 8, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json deleted file mode 100644 index 8175d5a49083b57b3ab87d891b8e709ead55e146..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-1B/d18f81730757e2d6d532.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-1B/d18f81730757e2d6d532.json deleted file mode 100644 index 0c27602883782de24b7309cefdf151e6bcc71564..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-1B/d18f81730757e2d6d532.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-3B/f7728d6b06549d8d7c1c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-3B/f7728d6b06549d8d7c1c.json deleted file mode 100644 index 828541f75eda4bdbb4b0bce7350d5c53ea1b5e26..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.2-3B/f7728d6b06549d8d7c1c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3-8B/a3c2f06094a32b71dfa0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3-8B/a3c2f06094a32b71dfa0.json deleted file mode 100644 index ddcd43387665cb553961dd48ce5ef2aa4056c9af..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3-8B/a3c2f06094a32b71dfa0.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0ad4c7d162a3a9bf8582.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0ad4c7d162a3a9bf8582.json deleted file mode 100644 index 455685a8e4f4dd05eb4d8c08f7468809eaa2b8af..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0ad4c7d162a3a9bf8582.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/20a78b677aab318eca07.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/20a78b677aab318eca07.json deleted file mode 100644 index e19ea0381e8adce28973c3f4ab100197109176d8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/20a78b677aab318eca07.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/32f56f12611337300d3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/32f56f12611337300d3c.json deleted file mode 100644 index 869bfa2b840d9c64915f08857cced50f9cca1aa0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/32f56f12611337300d3c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json deleted file mode 100644 index e311c98319579323a4ea30acceb767ef8db60f76..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "ctx_batch_size": 8, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 8, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json deleted file mode 100644 index f4856e628eb3974c3b4597ec51a6e1e4e90a6460..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "ctx_batch_size": 8, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 8, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6b1b231560c1337fa26f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6b1b231560c1337fa26f.json deleted file mode 100644 index 3b1e62aabeaf527e8ea353db5c6e1876b5602e8c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6b1b231560c1337fa26f.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json deleted file mode 100644 index 660ea9db811180c81df88f2f9c0ca9e842c14f30..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "ctx_batch_size": 8, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 8, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": true - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/9c4169b4fbb67b14a4a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/9c4169b4fbb67b14a4a9.json deleted file mode 100644 index eee7205fd45965fdd07bb86712e85b1e000dd8c6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/9c4169b4fbb67b14a4a9.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/aa8db34e3bc5856cf8f4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/aa8db34e3bc5856cf8f4.json deleted file mode 100644 index 0271af73cf12d8a9899961f66c545040fbe84578..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/aa8db34e3bc5856cf8f4.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json deleted file mode 100644 index 002d6ab43fbe91866e831334a7a5b05a28c4b6d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": true, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "ctx_batch_size": 8, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 8, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/d9dccd0a686a3878bfe2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/d9dccd0a686a3878bfe2.json deleted file mode 100644 index 119575231683f44a4c72bc39f88f46ce37c232f4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/d9dccd0a686a3878bfe2.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/171dec198f1b13338acb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/171dec198f1b13338acb.json deleted file mode 100644 index c362ed69ceaa489a7211cd325ae8a99126aee235..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/171dec198f1b13338acb.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/b0cf12ed63c6d8e22de0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/b0cf12ed63c6d8e22de0.json deleted file mode 100644 index 439dd55077b02a486479e91b858c47e53218b3be..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/b0cf12ed63c6d8e22de0.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/1a7c4c7a9665d0a48d41.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/1a7c4c7a9665d0a48d41.json deleted file mode 100644 index c9b41bdb904f44fda9ba680e97804deee17882e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/1a7c4c7a9665d0a48d41.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9b58d4a36161a1e49ecf0a69d20b2736fef8e438", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/2ef58f6c53deabf1ec2e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/2ef58f6c53deabf1ec2e.json deleted file mode 100644 index 949e286953097a2fbb420860f53975bcd7a5058c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/2ef58f6c53deabf1ec2e.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "ctx_batch_size": 4, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 4, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json deleted file mode 100644 index f807836ae8723b931da8e10a17b65cfca582b920..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "ctx_batch_size": 4, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 4, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json deleted file mode 100644 index 36153f65a47279641b6fb1654507533ec9b9375d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "ctx_batch_size": 2, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 2, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/605773b6644d01e65e12.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/605773b6644d01e65e12.json deleted file mode 100644 index 6b540b56752e4dcc4d34d0526f14ac1bb679d1cd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/605773b6644d01e65e12.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 1, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/78dc7e071d7a0e3aef78.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/78dc7e071d7a0e3aef78.json deleted file mode 100644 index d25e7d44888313ab88a38ed7b3ccca8e46afeccd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/78dc7e071d7a0e3aef78.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "is_continuous_batching": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 1, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/3ddf73792a2ae93f79fc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/3ddf73792a2ae93f79fc.json deleted file mode 100644 index c6700d881f38dafe56dc44c85e54785015df808c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/3ddf73792a2ae93f79fc.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/ccfd9d05ebbe51a3470d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/ccfd9d05ebbe51a3470d.json deleted file mode 100644 index 8a08074dbeac028de03c2be1fa92b7445c3e3bf1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/ccfd9d05ebbe51a3470d.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/b06faaa8aab7f46020fd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/b06faaa8aab7f46020fd.json deleted file mode 100644 index fd4e3f60dfcb38be35d9b8e5711a2c09d7159c9e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/b06faaa8aab7f46020fd.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/fb58f8120e41706af69c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/fb58f8120e41706af69c.json deleted file mode 100644 index 95ba3e0656fcbae2fd8b850a8cdc47b3a2d1e674..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/phi3/microsoft/phi-4/fb58f8120e41706af69c.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/03132552ec6caf198af1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/03132552ec6caf198af1.json deleted file mode 100644 index 1ae6fd7c35570a238e7494fbc6de7c650979e283..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/03132552ec6caf198af1.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/8b24a96ce19abde8141f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/8b24a96ce19abde8141f.json deleted file mode 100644 index d012519767e892b7aff9ff933111da36400967e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/8b24a96ce19abde8141f.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/0a866af4346130863cb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/0a866af4346130863cb1.json deleted file mode 100644 index 432551a00ff2059c186e37a345c0ac1bb3b73562..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/0a866af4346130863cb1.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/e6282f226f50b6d1933e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/e6282f226f50b6d1933e.json deleted file mode 100644 index 07e6c4a3ba1c306f050145b8e1ed221fb88f7991..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/e6282f226f50b6d1933e.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/acd214e59457e42c6475.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/acd214e59457e42c6475.json deleted file mode 100644 index 0cda2a0afc7583d4eca95acd9ec49dff1b66e93f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/acd214e59457e42c6475.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/f760cec2e673152f3cad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/f760cec2e673152f3cad.json deleted file mode 100644 index 748ff84dd7f8ecbcd5736ad8882b23fd7a2c4318..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-14B/f760cec2e673152f3cad.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0fc3d348676c188c81b1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0fc3d348676c188c81b1.json deleted file mode 100644 index 56b196e85ad03cbcdc70afc5ad4f81177a5b6fae..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0fc3d348676c188c81b1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/ca787f7ba3d5db6311d8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/ca787f7ba3d5db6311d8.json deleted file mode 100644 index 99b668018d6212fcfcaa5fc72deb57c956d52f15..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/ca787f7ba3d5db6311d8.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/cd2b3cc98ab5410dfa0c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/cd2b3cc98ab5410dfa0c.json deleted file mode 100644 index 5e285a3323c35082cc26a8ff3837b7a19818d137..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/cd2b3cc98ab5410dfa0c.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/423f0fccf5eb46e0ddba.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/423f0fccf5eb46e0ddba.json deleted file mode 100644 index d50818f6617b21127a81957c0b056c07a94363fe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/423f0fccf5eb46e0ddba.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a672c82a45b96fa4ba05.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a672c82a45b96fa4ba05.json deleted file mode 100644 index 762bc39db2f01581917bc1158f63234feefbf842..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a672c82a45b96fa4ba05.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a7d4a8aacd94444a3733.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a7d4a8aacd94444a3733.json deleted file mode 100644 index 625b5112650b9a5c2c8c080baadb6ef938ce2d1c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/a7d4a8aacd94444a3733.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b2a9cbdecc0759a41515.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b2a9cbdecc0759a41515.json deleted file mode 100644 index 87bb05259d45a03258a4b15627a6fa1a2f7f9951..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b2a9cbdecc0759a41515.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b3f787f56882c465143b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b3f787f56882c465143b.json deleted file mode 100644 index 7457e9bdde4307d5bed9c0fb5ac479ed4cff3646..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/b3f787f56882c465143b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/378d998bfc96453d55eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/378d998bfc96453d55eb.json deleted file mode 100644 index a61eb8a2aedd569775b9e8fe64dc7eef4ce81f8b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/378d998bfc96453d55eb.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/38c0ca6c81fdda8d77fb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/38c0ca6c81fdda8d77fb.json deleted file mode 100644 index b362d111f7157d4bc38b4eac9d03f2cfbb380a6b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/38c0ca6c81fdda8d77fb.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/9409f9fde65445832808.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/9409f9fde65445832808.json deleted file mode 100644 index ca9c182a6fcd1975303d87051db30646a6d4d860..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/9409f9fde65445832808.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/bded25644b8da99735a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/bded25644b8da99735a6.json deleted file mode 100644 index 9c2c5da39b262d260f417885b66809c9d2fa0124..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/bded25644b8da99735a6.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1aad3a30f1bc9a6b16a1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1aad3a30f1bc9a6b16a1.json deleted file mode 100644 index 674a7d42834166852ed69c12ce087e5a79828c3b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1aad3a30f1bc9a6b16a1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/69972da39f28d3f93b98.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/69972da39f28d3f93b98.json deleted file mode 100644 index bc638ecab340c9651a8d08e60d36ed0d169fc0a5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/69972da39f28d3f93b98.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6a2cd7c5aba4c5aeeaad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6a2cd7c5aba4c5aeeaad.json deleted file mode 100644 index 1d8be6eb3e296fe1b5dddba87b4e923c777b52fe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6a2cd7c5aba4c5aeeaad.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a907e73009e8eed0cf76.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a907e73009e8eed0cf76.json deleted file mode 100644 index ada6e1140a576b9b92401c5a897aa0310d2fd92d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a907e73009e8eed0cf76.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad0b74cc600d44ec371c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad0b74cc600d44ec371c.json deleted file mode 100644 index 95607c5a9181794e074b8e7f511e506422e80700..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad0b74cc600d44ec371c.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/dbcf6bb912a2bd22ffb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/dbcf6bb912a2bd22ffb1.json deleted file mode 100644 index d0f5e5286e66791b315568a66becc9978498c23a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/dbcf6bb912a2bd22ffb1.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f18273a93b5834ee6177.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f18273a93b5834ee6177.json deleted file mode 100644 index 8d1fd9f4788bc0e64dbd1718fd5e0191ee8cb867..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f18273a93b5834ee6177.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev4", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/116a92389d376ff63f7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/116a92389d376ff63f7c.json deleted file mode 100644 index c7202c26a497c7a76a3b8999171fd906172d4776..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/116a92389d376ff63f7c.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/29e0cce01911ec3999ce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/29e0cce01911ec3999ce.json deleted file mode 100644 index f760fd54c69fafa37db4b0433676e70f9dcf49f0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/29e0cce01911ec3999ce.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/4de40ad93fac20279d87.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/4de40ad93fac20279d87.json deleted file mode 100644 index eba03591353b84333e237bbca90b0e0a2bce99e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/4de40ad93fac20279d87.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/9d03fa6a0328c735985d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/9d03fa6a0328c735985d.json deleted file mode 100644 index a39dd2ce77a38c0eec906e946ad4988bd8ef4a65..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/9d03fa6a0328c735985d.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json deleted file mode 100644 index 241210f23e0405ff535bcd7746493a24b3821674..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/78dff79e53744cb4b829.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/78dff79e53744cb4b829.json deleted file mode 100644 index e4fbc7b69084399fa4977cae8f6e2b24e258d328..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/78dff79e53744cb4b829.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/934014c9a49c2163c872.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/934014c9a49c2163c872.json deleted file mode 100644 index 0dbbec77e3aa1d90816a120de5b723fd0448e962..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/934014c9a49c2163c872.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/d6f43dcf5d5b18cce8cf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/d6f43dcf5d5b18cce8cf.json deleted file mode 100644 index 6b4ab2ecae27ce40d3b5f9ff36b1d1ccf8717c4b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/d6f43dcf5d5b18cce8cf.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/f05d67e0d3d25fba38b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/f05d67e0d3d25fba38b7.json deleted file mode 100644 index f8dc33e58b70fd0ba41040476f4062d5f7cfcbe4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/f05d67e0d3d25fba38b7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/fcf53621044a8a69cc96.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/fcf53621044a8a69cc96.json deleted file mode 100644 index 7850403fa79618a033faef6b141ec90b3d5241cc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-8b-instruct/fcf53621044a8a69cc96.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/17db81ce4beaefe62dcc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/17db81ce4beaefe62dcc.json deleted file mode 100644 index c9f5e1e47ef290d7b9b7c25b16c82549e5469c90..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/17db81ce4beaefe62dcc.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json deleted file mode 100644 index 688bd6be0f8633f0d6ca1cad9dbbaba652ae68e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json deleted file mode 100644 index 6386bcb91a634eb25c024e21f14c1feefbce542b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json deleted file mode 100644 index a3acb9b5b9afe4c1481f59f0077003a8feb76e77..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/09ee2891136721dd36ef.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/09ee2891136721dd36ef.json deleted file mode 100644 index 4af261d871c7a022eae650e354b51d70f2b79722..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/09ee2891136721dd36ef.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2e1a92965c803915d392.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2e1a92965c803915d392.json deleted file mode 100644 index 7e1717296d047799d33194b6234a47a9b64840e3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/2e1a92965c803915d392.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3e8e2f592d3bfeb671e0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3e8e2f592d3bfeb671e0.json deleted file mode 100644 index 0b4c615fc415f45156b08a1f32698f4f3f7900f7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3e8e2f592d3bfeb671e0.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/796594eacef472f0ed8c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/796594eacef472f0ed8c.json deleted file mode 100644 index a5271eb8b6ea5a3389c1c74f34bc295a3dbe6949..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/796594eacef472f0ed8c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7bdcbdcc76765672133c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7bdcbdcc76765672133c.json deleted file mode 100644 index 876eb469301403a62fd6c2b1a13f178e8eccc987..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7bdcbdcc76765672133c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e53b3a8b0e8f289af987.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e53b3a8b0e8f289af987.json deleted file mode 100644 index 84653c6db0ee9b5e3a2e2062c9af48e04f62fac1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e53b3a8b0e8f289af987.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f312c9c859f99c0882d5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f312c9c859f99c0882d5.json deleted file mode 100644 index 99b21603a1ae40755fa767da63daab041a6a81b6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f312c9c859f99c0882d5.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json deleted file mode 100644 index 8b8bdb435ab88695a10bdb0a5135d3f16938eef9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json deleted file mode 100644 index 054a282284428a4d05af9ae4d6808806a7aab632..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json deleted file mode 100644 index b4d6243e5d4b8e3d9fb0d15be007b78f7087594c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json deleted file mode 100644 index 7fdd07bd374d7f5da5e783c14da2c82c20336a33..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 131072, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 131072, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 131072, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json deleted file mode 100644 index 77831aa0c3a4a6c1269b734f8eedd4e0ffe5cad6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json deleted file mode 100644 index 41be769517c9937d61a171d6722e062842b647d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json deleted file mode 100644 index 8ddd8456c15777b7d9f078e2585e326e83f5902f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json deleted file mode 100644 index dab7dde7fb0b6d9f6a796e2c4d9bd0b1449a0138..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 131072, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 131072, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 131072, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json deleted file mode 100644 index da020dfa113114e31fb18ef15a7f579b9887749e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json deleted file mode 100644 index 617bc7eaf3c72d0cef8a7359eca36759e8a14cde..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-13b-hf/ef167b545d2bc2a3ed7a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-13b-hf/ef167b545d2bc2a3ed7a.json deleted file mode 100644 index f55576e0ee1d94469019e06b2a0e94bebce7ada1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-13b-hf/ef167b545d2bc2a3ed7a.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-7b-hf/bbdc1bd93e5bd60caa33.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-7b-hf/bbdc1bd93e5bd60caa33.json deleted file mode 100644 index ec8aa99fe7c03bdc571af74e0165749d0834b323..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-2-7b-hf/bbdc1bd93e5bd60caa33.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/84f4ef2f2d5354fc95f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/84f4ef2f2d5354fc95f9.json deleted file mode 100644 index 4f33f2637751a72adcbe8fb2afbe35fcc58462c2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/84f4ef2f2d5354fc95f9.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/8d7b7f00df6dd6383ce0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/8d7b7f00df6dd6383ce0.json deleted file mode 100644 index e5a5d637a184205215566eb986136842df9c67e2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-70B-Instruct/8d7b7f00df6dd6383ce0.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json deleted file mode 100644 index 061d5602bf018696680671b6067dcd397e6ae8db..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 64, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json deleted file mode 100644 index 66689ec6f68575f64c41aa744aa3ef4e52c5e412..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 64, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 64, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json deleted file mode 100644 index fd1ee81050085ab51af0445495625da21156ccbe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json deleted file mode 100644 index e618799b3dc350ffc5bf05df0005a0f4a824c418..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B/46603f8ace1dc81aad68.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B/46603f8ace1dc81aad68.json deleted file mode 100644 index 5bd7f4a311a34a2bc6a8a8867014c011fe57cf84..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B/46603f8ace1dc81aad68.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-3B/90cc44596fc79cf4184c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-3B/90cc44596fc79cf4184c.json deleted file mode 100644 index 818a6f3bd0deb56e8331909527429d91ce541ccf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-3B/90cc44596fc79cf4184c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3-8B/d78bef341ddb013783b4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3-8B/d78bef341ddb013783b4.json deleted file mode 100644 index b06935edda042b26661584a23651dd5a8b389c1e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3-8B/d78bef341ddb013783b4.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/13befa443067554ab18b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/13befa443067554ab18b.json deleted file mode 100644 index 4f001fb0a4cac59f2bff8c93e402c753fa223021..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/13befa443067554ab18b.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/17dff1f29f1a953ad10c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/17dff1f29f1a953ad10c.json deleted file mode 100644 index 84cc257027ee284345a3f5d9d7000eaf1ddb64e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/17dff1f29f1a953ad10c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json deleted file mode 100644 index d74b1f6cad110aadc6f7ba864a4e56a05169fc94..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2078d8ec90ea4baba1a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2078d8ec90ea4baba1a8.json deleted file mode 100644 index 388766ca0187c289aac7af4623a2bcf36890c096..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2078d8ec90ea4baba1a8.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json deleted file mode 100644 index a321dd8afdf5bda01b0e16b81dc8a26ffa956888..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json deleted file mode 100644 index d38172b14ae7a489c79aaccac26fc3f196e86e9d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 64, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 64, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json deleted file mode 100644 index 9e80009b51724d4cae514746d961ea6203139c38..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json deleted file mode 100644 index 966875a25609e1452ad6022778b369cb4183b467..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/4d4f03eb5a0cba44e5ae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/4d4f03eb5a0cba44e5ae.json deleted file mode 100644 index 112c46360155f3ef67ece56253c5d40d5be976de..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/4d4f03eb5a0cba44e5ae.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json deleted file mode 100644 index cbb04b68014d09b30996e4cb8edb19c939320090..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json deleted file mode 100644 index 146b222d1439e631a573272f85c4ccd50d18e91b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/6f158c35bb4f130c7cce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/6f158c35bb4f130c7cce.json deleted file mode 100644 index 8a47e130fb78fa989cac64e2bc476c04b4cfea8e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/6f158c35bb4f130c7cce.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json deleted file mode 100644 index 91fb853613704680bf9fb154cddef54f6100e7c0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/8a18cd985a54d9b376eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/8a18cd985a54d9b376eb.json deleted file mode 100644 index e4f2c32d53f32b3b47ae77f8c0534bc24f477c0d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/8a18cd985a54d9b376eb.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json deleted file mode 100644 index 5c45d0daf631e4aba99856997a9a7b5c497ee565..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json deleted file mode 100644 index 1a68633a93ac094b5515b50dbf2a64d258e0faa2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/e525e95e9a1dc8c1f7b3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/e525e95e9a1dc8c1f7b3.json deleted file mode 100644 index 1875bc5175b1d9e1a6da7ee75d1d93055baa1a5b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/e525e95e9a1dc8c1f7b3.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/22c7daf708e1e1c7b302.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/22c7daf708e1e1c7b302.json deleted file mode 100644 index 9805e2c1c3607eacde7b3a9f1612a39b033384df..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/22c7daf708e1e1c7b302.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/ad41018b2ffc2f32cff5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/ad41018b2ffc2f32cff5.json deleted file mode 100644 index 71346df9d4c35c661ebb1d01dd28f2e57147d8a2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/princeton-nlp/Sheared-LLaMA-1.3B/ad41018b2ffc2f32cff5.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json deleted file mode 100644 index 1aa6594271b8fda59471c29473dd49a2936512e1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json deleted file mode 100644 index 948aa8e93467d08020438fca6c433d2fac8eecac..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json deleted file mode 100644 index 25f453a6d16c427e489c7f51db7cd9d31c8a90f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json deleted file mode 100644 index 009ed8368b9e52576ef542010e5123c472821b29..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": null, - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json deleted file mode 100644 index 7a0150d747f7111bb7d09073bb9dd6f632b6de5c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json deleted file mode 100644 index 24b50cb006fe8c6d27d24e4532d105bf1ee0e27a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json deleted file mode 100644 index f6af62b36854f68acb3aeb56cc7103e3526aa930..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json deleted file mode 100644 index 8e03bd917935620fe9c6a218c5f7dbbee980ab25..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json deleted file mode 100644 index 53fdf98f139b406dff01372eb53c6993b71beefb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json deleted file mode 100644 index 0bb70999a12776cd0fe1eab73c2f267e89a8f9b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "ctx_batch_size": 4, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 4, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json deleted file mode 100644 index 56348a198558c57ab0e27bef05cd73c4ed920246..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json deleted file mode 100644 index 2c732950a77e807e7548b4da8c2b945462d1c0f1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json deleted file mode 100644 index 748845f910c5022dd9833990c6e1e213606c94c8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json deleted file mode 100644 index 1f4732a7b49fb7cf527f73c032c204dbf0d3ef06..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "ctx_batch_size": 1, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 4, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json deleted file mode 100644 index f576242fe9412b05009d4bde8e65f29eb49f6a79..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json deleted file mode 100644 index 289a05142c5150167add32bb376d6738a0fe7d43..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": null, - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json deleted file mode 100644 index 9a8ee57db4ad994ab65d7e1539f38113be4f1e0c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json deleted file mode 100644 index bc0980857d8726571921e874572072ab5fa5ce37..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "ctx_batch_size": 4, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "tkg_batch_size": 4, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json deleted file mode 100644 index 903e5cddf913d527d7b6eb450e842236caf5bdb6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 32768, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "checkpoint_revision": "41bd4c9e7e4fb318ca40e721131d4933966c2cc1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 16, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev5", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 16, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 1000000.0, - "router_aux_loss_coef": 0.02, - "router_jitter_noise": 0.0, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/4b68c27fbfeeaf5de8e2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/4b68c27fbfeeaf5de8e2.json deleted file mode 100644 index e2e981f30cd8a02b352820793bc7936c8affd9a8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/4b68c27fbfeeaf5de8e2.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json deleted file mode 100644 index 9ef8385aa23fe097c3a97b83ffc8ed33af523eab..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/32f96d29624b37ea5bb0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/32f96d29624b37ea5bb0.json deleted file mode 100644 index 1b0ca01623da2afcf422a359f39f8c3735b16575..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/32f96d29624b37ea5bb0.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/333785138bc8cb9ba0f8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/333785138bc8cb9ba0f8.json deleted file mode 100644 index 3ffe3824da25fe8626a5b6bef354f0d9af8a55e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/phi-4/333785138bc8cb9ba0f8.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json deleted file mode 100644 index 7203ec938a266339c7d7eb7038e3f14695c5e1c8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/882a33b849f12cb2ddc6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/882a33b849f12cb2ddc6.json deleted file mode 100644 index 78719aa36e313243ca9da63ba8b9867c14cc77b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/882a33b849f12cb2ddc6.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json deleted file mode 100644 index 46cc725cbe57f312fa9af1ccc3ad30f600bc9cdc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/0df9c95dcbe4cc0671ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/0df9c95dcbe4cc0671ec.json deleted file mode 100644 index 05e739937076cde7d226f72f6ce5f6f21c69fef4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/0df9c95dcbe4cc0671ec.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/f6acf5b4db984618169a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/f6acf5b4db984618169a.json deleted file mode 100644 index 1d3a3b0ef52b06fe42c831aa2bb270a12afb1018..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-1.5B/f6acf5b4db984618169a.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/892e55dd56c23e791e50.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/892e55dd56c23e791e50.json deleted file mode 100644 index 31d28139567427f8e1189a24b5a391e567c2f7b5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/892e55dd56c23e791e50.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/91e86e48829c73c500be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/91e86e48829c73c500be.json deleted file mode 100644 index 7f22157a5f2ec2293bdc23d62ed7128dbb129bf0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-14B/91e86e48829c73c500be.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/bcdeafe2836b415ea078.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/bcdeafe2836b415ea078.json deleted file mode 100644 index d27e70646356417b31f8ce675c55c616227d450d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/bcdeafe2836b415ea078.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/d5dc4b6bb8befed478d9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/d5dc4b6bb8befed478d9.json deleted file mode 100644 index 6d06bfc12e6f70f12816a9e3d569b2ce208d52eb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-32B-Instruct/d5dc4b6bb8befed478d9.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-72B-Instruct/49df3fba7fb3a21caba1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-72B-Instruct/49df3fba7fb3a21caba1.json deleted file mode 100644 index 0cd43a63ee3c66ef9e23cc58470220f9f4a39efa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-72B-Instruct/49df3fba7fb3a21caba1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/35de1441a23ffe0a85e0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/35de1441a23ffe0a85e0.json deleted file mode 100644 index e8e33fdc1bc83a3b9b630381e4be817e2e62ac12..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/35de1441a23ffe0a85e0.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/4843b179ce014e6bccfa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/4843b179ce014e6bccfa.json deleted file mode 100644 index 2d03cf0b2a56153463d2ec624fe1569af3878fc5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/4843b179ce014e6bccfa.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/8b9dbd7147fba02a080f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/8b9dbd7147fba02a080f.json deleted file mode 100644 index 52eb548771e85f7ec9081a8cf80953d684b16dca..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/8b9dbd7147fba02a080f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/963163e5d5c769cba622.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/963163e5d5c769cba622.json deleted file mode 100644 index 981a63ca7c0acf0558d411f5699623051200fdad..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/963163e5d5c769cba622.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/da16f722450a1545fc86.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/da16f722450a1545fc86.json deleted file mode 100644 index 7e0168a7a2ca825afde3b64dad7baf4ab2145869..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-7B-Instruct/da16f722450a1545fc86.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4e60a69b6d9e2b3915f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4e60a69b6d9e2b3915f9.json deleted file mode 100644 index c30dd35282195b8fed8de9becb068ed23b9a2e3f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4e60a69b6d9e2b3915f9.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/8fcb82af4cdab6542ff8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/8fcb82af4cdab6542ff8.json deleted file mode 100644 index ca42047349c88e92eb43c2d1b45b93a9997860c1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/8fcb82af4cdab6542ff8.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5e2178097415394dea10.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5e2178097415394dea10.json deleted file mode 100644 index b888cbc33524497265f237ee913b8f7090d70236..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5e2178097415394dea10.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/c0f8f78fadc4ee5b5baa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/c0f8f78fadc4ee5b5baa.json deleted file mode 100644 index ccd95ddbcc7ce4e872e0cb7ec02c56949f084849..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/c0f8f78fadc4ee5b5baa.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1228700351e835898348.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1228700351e835898348.json deleted file mode 100644 index 8d30c916ba6438cc0d3e2d715f325296644d31c2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/1228700351e835898348.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/54ee8c6fc6fb26a7342e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/54ee8c6fc6fb26a7342e.json deleted file mode 100644 index 8e8baed42cd3a7c8d4a53709c61fc90e60fe6bfe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/54ee8c6fc6fb26a7342e.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/374079231f190a75af09.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/374079231f190a75af09.json deleted file mode 100644 index 72d3f5414d4ef6b7881ab79aaf87b8997b9e1914..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/374079231f190a75af09.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4c74c19ca533d3650982.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4c74c19ca533d3650982.json deleted file mode 100644 index 1e74b048d33adbbeded0ced28c6d260d4d13d5c4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4c74c19ca533d3650982.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79552f8f3add20ca21db.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79552f8f3add20ca21db.json deleted file mode 100644 index 8b30cbfbaa2086334e30214af046107f4a367d17..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79552f8f3add20ca21db.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79be474d77b6ec4da5fa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79be474d77b6ec4da5fa.json deleted file mode 100644 index 1e85da097ee46a7e926a95a67da3b97dff7cb56b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/79be474d77b6ec4da5fa.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/b35fc31901d4189fa831.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/b35fc31901d4189fa831.json deleted file mode 100644 index 85a683f5cef47fa91d5f0484e60a6eea6f0b4204..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/b35fc31901d4189fa831.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev5", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/63cbe639855d1ca07436.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/63cbe639855d1ca07436.json deleted file mode 100644 index 237446904aa01dc1157dd171c877df7a5a98c22e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/63cbe639855d1ca07436.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/ac838c5c1192d3a4e32a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/ac838c5c1192d3a4e32a.json deleted file mode 100644 index b888fc690e730f1f7603629fc7967b5f1a109e37..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/ac838c5c1192d3a4e32a.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c97bbc53e2a9f6127f7a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c97bbc53e2a9f6127f7a.json deleted file mode 100644 index 5f6427e0bdb5df7a25d5d4cc4396ecf0ef5cf1c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c97bbc53e2a9f6127f7a.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/e86f136b19f0cadd4390.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/e86f136b19f0cadd4390.json deleted file mode 100644 index ece35d5b736b4d5a53b2a9b734f87668b4e27d56..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/e86f136b19f0cadd4390.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/f9859c21d0108182bb68.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/f9859c21d0108182bb68.json deleted file mode 100644 index 585320002451e5bde69963a89f273b158be47fb4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-2b-instruct/f9859c21d0108182bb68.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/13d0afd9b5b23f0ae025.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/13d0afd9b5b23f0ae025.json deleted file mode 100644 index 34ead3b2e08191f1d949b8f437179de5a31c5927..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/13d0afd9b5b23f0ae025.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/27c5dd63173c995f2f5b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/27c5dd63173c995f2f5b.json deleted file mode 100644 index bc9dce0a506b285c8ee843e21d1f9792a539b825..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/27c5dd63173c995f2f5b.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/9b02503e4de6c83c0f72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/9b02503e4de6c83c0f72.json deleted file mode 100644 index bdb91d1eb302d8acdb9437c86dc17cadc2689ae9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/9b02503e4de6c83c0f72.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/dfb81faa493569fde63e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/dfb81faa493569fde63e.json deleted file mode 100644 index e6d910843a9520d2031ec75d67c8bdc5121f1de4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/dfb81faa493569fde63e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/f067751cb760f9394422.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/f067751cb760f9394422.json deleted file mode 100644 index 7b4c38d4938665f35415121350871f0bdb497bf0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/ibm-granite/granite-3.1-8b-instruct/f067751cb760f9394422.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/9790d481e3a3e01abad0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/9790d481e3a3e01abad0.json deleted file mode 100644 index 7acd62789a170e905c8487fae354905c9b1706cf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/9790d481e3a3e01abad0.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0759bfb42a67ce6bdb6b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0759bfb42a67ce6bdb6b.json deleted file mode 100644 index 6d0f804e66e225197cbd65e6d15ca3f37f935170..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0759bfb42a67ce6bdb6b.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0cc396f651c85ae92229.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0cc396f651c85ae92229.json deleted file mode 100644 index 64ec2e43543fbe38a3c34676c4ce1b6837d2dd65..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/0cc396f651c85ae92229.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8c8067a871ad155429de.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8c8067a871ad155429de.json deleted file mode 100644 index 65c2d9ac1063538a9e7e107b6f1504774dcfbca6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8c8067a871ad155429de.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9809764cbcdce158aea7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9809764cbcdce158aea7.json deleted file mode 100644 index 301f8bb7e24bacfd44e5c2318cda9e71690080dc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9809764cbcdce158aea7.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9bb25978a1a70c2fc39f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9bb25978a1a70c2fc39f.json deleted file mode 100644 index ae9aeb8d12034db10f405928ff34c7e6a64ad31c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9bb25978a1a70c2fc39f.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ad42d7f829b48f44b83a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ad42d7f829b48f44b83a.json deleted file mode 100644 index d2053092b9f63516c7737b1207716adce39ea07e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ad42d7f829b48f44b83a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b285877d6cf867d0af03.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b285877d6cf867d0af03.json deleted file mode 100644 index 0cbd10e7973cbaf0f787411b789d42dc91cea305..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b285877d6cf867d0af03.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-13b-hf/8f6646a5ae93554f9f95.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-13b-hf/8f6646a5ae93554f9f95.json deleted file mode 100644 index 59a3efc1180f9298d261f90aedaee49233048b1b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-13b-hf/8f6646a5ae93554f9f95.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-7b-hf/1408a5093d34990ff777.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-7b-hf/1408a5093d34990ff777.json deleted file mode 100644 index c81cd11e93e7d2c6ab2d9447bf141cb7813e6b5d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-2-7b-hf/1408a5093d34990ff777.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 2048, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/7386f77d576ce789e108.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/7386f77d576ce789e108.json deleted file mode 100644 index 2ebe74f14ab13a593216956c6f46fa3371af6356..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/7386f77d576ce789e108.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/75eb1f7c792bc806438f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/75eb1f7c792bc806438f.json deleted file mode 100644 index 351c88d1598ba1898f66be39b40924c70da063a9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.1-70B-Instruct/75eb1f7c792bc806438f.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-1B/45dfda5c37fc1ead02b5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-1B/45dfda5c37fc1ead02b5.json deleted file mode 100644 index 9b0b47d96cc2df91612a946e71d6866339249f34..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-1B/45dfda5c37fc1ead02b5.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-3B/6dba79fe28931d6e1fb5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-3B/6dba79fe28931d6e1fb5.json deleted file mode 100644 index 7f103c5b829c7a50076aaf5d1552ef965e2bce16..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Llama-3.2-3B/6dba79fe28931d6e1fb5.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3-8B/b8189cf68d5782250668.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3-8B/b8189cf68d5782250668.json deleted file mode 100644 index a17c732e7394d8f7f1ae41d964a51d73f8bf02ac..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3-8B/b8189cf68d5782250668.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/0ad6c4353120bc406405.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/0ad6c4353120bc406405.json deleted file mode 100644 index acaa64aab6b4ad46a50471dad5348f02e9f0fbe3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/0ad6c4353120bc406405.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/2181b9a353706d5fcc6a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/2181b9a353706d5fcc6a.json deleted file mode 100644 index bb703e07a05a75937adc8d1b1488d85eb1486050..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/2181b9a353706d5fcc6a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/b98382d7e285af6654fb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/b98382d7e285af6654fb.json deleted file mode 100644 index c9bf162673eb5386e44b185bcb4f61187d233599..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/b98382d7e285af6654fb.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/cb5308b8c38922482b6a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/cb5308b8c38922482b6a.json deleted file mode 100644 index 1caac6aef256a19fc548954957ab59448c4451f7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/cb5308b8c38922482b6a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d310bc9c73bee7993aee.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d310bc9c73bee7993aee.json deleted file mode 100644 index 351eefcbfa55a871ae59adfaa9a1750272e7e684..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d310bc9c73bee7993aee.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d3d0dcc99030d5e66aea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d3d0dcc99030d5e66aea.json deleted file mode 100644 index ed835639a3aa5b20befa95756bea7d0c2a97335c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d3d0dcc99030d5e66aea.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d83b1df35206c627ba2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d83b1df35206c627ba2c.json deleted file mode 100644 index fe278e0378a6f65468a510c3043e39625512b57c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/meta-llama/Meta-Llama-3.1-8B/d83b1df35206c627ba2c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/86ef540d1344e310fe24.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/86ef540d1344e310fe24.json deleted file mode 100644 index 23cc3827089679cb9ffe2be04d0ae249519a8879..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/86ef540d1344e310fe24.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/8dd9f1c6561a5ab42b7f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/8dd9f1c6561a5ab42b7f.json deleted file mode 100644 index 58412a5b3016a13fb4baedea1feaddfd999e7e84..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/princeton-nlp/Sheared-LLaMA-1.3B/8dd9f1c6561a5ab42b7f.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json deleted file mode 100644 index d14d15205f40c119c4d67db43d2d9ea98ff564f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev6", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json deleted file mode 100644 index 88d03e84808e7c675032a7b80e7efc6b9f5b59c0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev6", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json deleted file mode 100644 index 3fbb2f6ee8fa749e9ca965bc33a1a454a0960736..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev6", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/4f532fbd378d1134587c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/4f532fbd378d1134587c.json deleted file mode 100644 index 5eb94130e93931c0e2025ec1137e110c4108cfe2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/4f532fbd378d1134587c.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/90c30bb5a6fed4f035d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/90c30bb5a6fed4f035d3.json deleted file mode 100644 index 14680638ee7b7115c24e702892366025d14ecab2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/90c30bb5a6fed4f035d3.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/23e5e7ba06e9034ff143.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/23e5e7ba06e9034ff143.json deleted file mode 100644 index 5a2df57dd22f3461c136019f520475b4cdec1e30..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/23e5e7ba06e9034ff143.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/a8d462d56035064c576f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/a8d462d56035064c576f.json deleted file mode 100644 index 34a5f5991a07782a529e60b1ea0682669f2a8706..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/microsoft/phi-4/a8d462d56035064c576f.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/2cef8a43e294132ccb80.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/2cef8a43e294132ccb80.json deleted file mode 100644 index cc416df229ee6652e5fceec20514e238a7a46c53..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/2cef8a43e294132ccb80.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/3b6da9895db60e9899a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/3b6da9895db60e9899a0.json deleted file mode 100644 index 73312ed152537153025dc6cccfef0df3e37b2a20..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-0.5B/3b6da9895db60e9899a0.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/89eb5a25b3cea125b59d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/89eb5a25b3cea125b59d.json deleted file mode 100644 index 1ffc6b096bb4b5b97a5094905234494a7a0f93eb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/89eb5a25b3cea125b59d.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/ee6191be7d06d628c469.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/ee6191be7d06d628c469.json deleted file mode 100644 index 641a5b29d148591ded51c876bef8d258db5ac570..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-1.5B/ee6191be7d06d628c469.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/2244662df4c14ecb471e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/2244662df4c14ecb471e.json deleted file mode 100644 index fa3ffc589bcc50f861a99cd887573a6bad55d404..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/2244662df4c14ecb471e.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/878cbeedff8739902f2a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/878cbeedff8739902f2a.json deleted file mode 100644 index 9ea3a885730554f9d95519d463c6f63b5a147821..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-14B/878cbeedff8739902f2a.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/198f34647957b7e830e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/198f34647957b7e830e8.json deleted file mode 100644 index 138fb5455317ffa3f73eae85b25b3c8fdb9577b7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/198f34647957b7e830e8.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/9bdc2c10c6e78a0bcede.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/9bdc2c10c6e78a0bcede.json deleted file mode 100644 index 047187022f86dd7fed1d4a824379493eecde4a27..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-32B-Instruct/9bdc2c10c6e78a0bcede.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-72B-Instruct/8f53b33f773660feba62.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-72B-Instruct/8f53b33f773660feba62.json deleted file mode 100644 index adfda3042cf2f65b750895602dc019b8503894f1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-72B-Instruct/8f53b33f773660feba62.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/6baaa366c1d579394d6f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/6baaa366c1d579394d6f.json deleted file mode 100644 index 0d7dddcfd7fc5f23c00b3081e48ef6a3ccd0447a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/6baaa366c1d579394d6f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/9589e206b0dfc8a5190c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/9589e206b0dfc8a5190c.json deleted file mode 100644 index b686c60c64c725536c174b7fde0cd3c6c96f1352..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/9589e206b0dfc8a5190c.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/cd038ed0758d954e503d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/cd038ed0758d954e503d.json deleted file mode 100644 index 7b7dff0e4bd0807612c9251202ae4952ec3a534d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/cd038ed0758d954e503d.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/d70bb4430d541ac2b9c8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/d70bb4430d541ac2b9c8.json deleted file mode 100644 index 1ab242e4b36600c76936b62a880563fae23faaa5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/d70bb4430d541ac2b9c8.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/fb8ca325adcb5496bd9b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/fb8ca325adcb5496bd9b.json deleted file mode 100644 index 8ec4fda56fe01efd7e3614a279b9836a8b091480..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/Qwen/Qwen2.5-7B-Instruct/fb8ca325adcb5496bd9b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/44e579ffaf4767b3a299.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/44e579ffaf4767b3a299.json deleted file mode 100644 index d1df29aeca79bae72199d900d77055547ac3e66d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/44e579ffaf4767b3a299.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d025194c936e0a753b35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d025194c936e0a753b35.json deleted file mode 100644 index f388db7b5401e26cc287f9ddabf2567b686ad6a5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d025194c936e0a753b35.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/3890fa3901a57a90810a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/3890fa3901a57a90810a.json deleted file mode 100644 index 1231b134134d8d31c0406e8b48393258c6b2b255..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/3890fa3901a57a90810a.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/964de8a7159826d20655.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/964de8a7159826d20655.json deleted file mode 100644 index e7b9e1e5574c82bb667720443713a5bd4f71eda1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/964de8a7159826d20655.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/104a0de4667247359e2b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/104a0de4667247359e2b.json deleted file mode 100644 index 73b2706ef063024d20e083d9fcccace32d4a268e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/104a0de4667247359e2b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/75070fc067762e58dfec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/75070fc067762e58dfec.json deleted file mode 100644 index 8464f471e9f1b4c22b9e28e786c1df10b5b401f1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/75070fc067762e58dfec.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/3dc24f81ce81cada0a2b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/3dc24f81ce81cada0a2b.json deleted file mode 100644 index 7e59321214689d4c1d620610130391355f9502ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/3dc24f81ce81cada0a2b.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/86097cb6b43aecf90e8d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/86097cb6b43aecf90e8d.json deleted file mode 100644 index b112d294f9fed652e6c717a56a1c4508f15748b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/86097cb6b43aecf90e8d.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5d09d1742362a012b3b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5d09d1742362a012b3b.json deleted file mode 100644 index d31f9e9665a12b3cad6da1ca3a9d2277c72fbbc6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5d09d1742362a012b3b.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d0c899dc3f5ef49f46d5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d0c899dc3f5ef49f46d5.json deleted file mode 100644 index bc814b68293bc0dc3eaf4bcd5a8f229579dfa571..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d0c899dc3f5ef49f46d5.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5dcbb24cb4ca044b93c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5dcbb24cb4ca044b93c.json deleted file mode 100644 index 49384efb0c9075d413e1bae92c876b70d3ac0d3c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5dcbb24cb4ca044b93c.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev6", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json deleted file mode 100644 index eae69da7ad4256a65ed10d9aa0cf6705c2cb1b00..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json deleted file mode 100644 index 2ba8d4d3e1492c85162a91cf6b7ebcff0c5e9bd2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json deleted file mode 100644 index 36bcccb268f07104571d19cc3200d4535b06bf76..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/042bd31b8692565be2d8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/042bd31b8692565be2d8.json deleted file mode 100644 index 9f348b8e47b71f15f201b31dd0e1440d52c1b4d8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/042bd31b8692565be2d8.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json deleted file mode 100644 index ddcde7407b1eb93cd03e934f8dfa68a45b5205f7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/e67620f86b447624fd17.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/e67620f86b447624fd17.json deleted file mode 100644 index 460cdf56f11419865a2a3453256710ffd93a9746..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/e67620f86b447624fd17.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f7e5c0a43dd13b8e0971.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f7e5c0a43dd13b8e0971.json deleted file mode 100644 index 2f7f101d185b9c30bd3b3b430c058712a6177c61..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f7e5c0a43dd13b8e0971.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f833ea81ae38907e2d4e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f833ea81ae38907e2d4e.json deleted file mode 100644 index a520870593c34f2356d94d538b9248c8099d5ed3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/f833ea81ae38907e2d4e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/11470d279e6abc1fc4e7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/11470d279e6abc1fc4e7.json deleted file mode 100644 index a47cf9aa6cd95e556284bb3df29b6474a358701b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/11470d279e6abc1fc4e7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/35cb39e39c5ce991caf2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/35cb39e39c5ce991caf2.json deleted file mode 100644 index 5275f731e9aa4253ddfa62eafaeb4d66070ff89f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/35cb39e39c5ce991caf2.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/b76d4e1eb06bba95f5b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/b76d4e1eb06bba95f5b7.json deleted file mode 100644 index e3237e3641fd017f9564b50146326e3c928cc738..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/b76d4e1eb06bba95f5b7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/e77b3dfb32d82a78797e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/e77b3dfb32d82a78797e.json deleted file mode 100644 index 6a9dbddd8fc6456ee19001f23603be10b5b6ffe5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/e77b3dfb32d82a78797e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/ef919a078ee21f4b278f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/ef919a078ee21f4b278f.json deleted file mode 100644 index 82da1185bd9e9522d0118e8df4782ea2e2691061..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-8b-instruct/ef919a078ee21f4b278f.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/aba3a4fb5110b2624663.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/aba3a4fb5110b2624663.json deleted file mode 100644 index aacffcf9f6db99c4e14609bdd44025c4672dd3be..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/aba3a4fb5110b2624663.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json deleted file mode 100644 index 8446ce90e2d9ad4f5ba75da7afe2ac084850f856..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json deleted file mode 100644 index 76ae49d01bebe6516cd8afbf3a20ea25af47e7e0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json deleted file mode 100644 index 0b5add12a90a30edaa437560d4ba85af17395f85..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06f801ef71308c7000ea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06f801ef71308c7000ea.json deleted file mode 100644 index 80dfbdf5a7be8c7fefd16e66a8c19b1437ce31bf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06f801ef71308c7000ea.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1be46ac7c7fbd3ee3e56.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1be46ac7c7fbd3ee3e56.json deleted file mode 100644 index 39123673324a9c1cc3916659db1f6158490d25b4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1be46ac7c7fbd3ee3e56.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/612a7e02ca27512e7226.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/612a7e02ca27512e7226.json deleted file mode 100644 index 7cfac9847d669370567f78c632b2261886c92c6b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/612a7e02ca27512e7226.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7dbefa3cf5f273f605a5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7dbefa3cf5f273f605a5.json deleted file mode 100644 index e282857b8a96e9ffe95265e45f75a4ee7552cd91..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7dbefa3cf5f273f605a5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/df089376c4fbeee386dc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/df089376c4fbeee386dc.json deleted file mode 100644 index 6dca3ad7271ebdf42f3af76827b296e278500323..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/df089376c4fbeee386dc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e012a70926ce7c006be7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e012a70926ce7c006be7.json deleted file mode 100644 index 2012d7c56bd30c933d85ddc1512c5da9a9ad751a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e012a70926ce7c006be7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json deleted file mode 100644 index d4416655bcbd318db1237accb344135b6c21db34..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json deleted file mode 100644 index 46fcd0ba4590aa5c04e1aee016fb94538a7ac151..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json deleted file mode 100644 index 13908f95369b0a43ba1245d72030264e26eac9eb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 512, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 512, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 512, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json deleted file mode 100644 index b0348e42c046419cb685e7709928ba82ccb0874d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json deleted file mode 100644 index f3554ea054eabc9b92261f56bb8563a7a2d3b033..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 1, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 512, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 512, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 512, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 1, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json deleted file mode 100644 index 9b0892f86c180eff37da12be6c5f7d56a1e690ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json deleted file mode 100644 index dc88da36e7c94ede4c92abd93b1c1d5dd2ecc448..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json deleted file mode 100644 index 20123ccdd84691c6826582391f2c6629aa106085..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 1, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 1, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json deleted file mode 100644 index d94960882dd600f2043a129f00aeeb710ddd76ed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json deleted file mode 100644 index 64f1a44056cf48874b93f4e1e42748fee6f59bdc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json deleted file mode 100644 index f3c6270414dee6923a324e759e8d4d946d5092ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 131072, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 131072, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 131072, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json deleted file mode 100644 index dc044183dea94d359d50f6e661e0b174fbc1ffba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 131072, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json deleted file mode 100644 index ab0080bb057c590e885279cc1a02810dee516109..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json deleted file mode 100644 index e6668f0b9ff19c3195df1f34c5e84579a9a44f49..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 1024, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 1024, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 1024, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-13b-hf/7c3d63b4aa2505b36bae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-13b-hf/7c3d63b4aa2505b36bae.json deleted file mode 100644 index a465ff08a8f13b9ca2190070b1ff9bd62cd47d55..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-13b-hf/7c3d63b4aa2505b36bae.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-7b-hf/a3d0fdfd44e634ee676e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-7b-hf/a3d0fdfd44e634ee676e.json deleted file mode 100644 index 3585e59e44ec8099547ac584006f38e20fcb31d7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-2-7b-hf/a3d0fdfd44e634ee676e.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/11acbd57ae2875970055.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/11acbd57ae2875970055.json deleted file mode 100644 index 356f42576abbfd32309d501e398f4588571bf1d4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/11acbd57ae2875970055.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/24d872e690c041569e19.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/24d872e690c041569e19.json deleted file mode 100644 index 30e4cec887541ee48ae302f3a2a9f5bf08810498..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-70B-Instruct/24d872e690c041569e19.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-8B-Instruct/d04674b7e4428d46a038.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-8B-Instruct/d04674b7e4428d46a038.json deleted file mode 100644 index ca7d266b9e12283cae58b107fc1f509164927600..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.1-8B-Instruct/d04674b7e4428d46a038.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", - "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json deleted file mode 100644 index 208b41849b5dbc24cf502a9a430a656f10fed302..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json deleted file mode 100644 index 5d49fd922a23b75aaa3fb8a5471d5baa9db722b8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json deleted file mode 100644 index 8ef036b0f08d405f2fa040a87f17b1ec8a908a15..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B/12cfb7a14805c78317ea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B/12cfb7a14805c78317ea.json deleted file mode 100644 index ffd31c6e89c3a7d0b171c96220bcae50ec73a4cc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B/12cfb7a14805c78317ea.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-3B/d3e98937d2351cff253c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-3B/d3e98937d2351cff253c.json deleted file mode 100644 index cf209ece49e773c15241df084584d8520fd12d9e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-3B/d3e98937d2351cff253c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3-8B/d125ea3d9efee0626398.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3-8B/d125ea3d9efee0626398.json deleted file mode 100644 index 34a02cadbc2582b91a1aa9021dfd82a67e2a14f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3-8B/d125ea3d9efee0626398.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/0965710a4cf6efb409a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/0965710a4cf6efb409a9.json deleted file mode 100644 index 2c6b477a3706aca6b5924ceeecba98875f1b9813..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/0965710a4cf6efb409a9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/1ff79c8ecc5f477f3b63.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/1ff79c8ecc5f477f3b63.json deleted file mode 100644 index c315d43225c411eb0cf986fdc1c62fe531678b29..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/1ff79c8ecc5f477f3b63.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/413e4acf756fa03230b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/413e4acf756fa03230b7.json deleted file mode 100644 index bc1d36832abb4af862f95dba9bda30ec8764f93a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/413e4acf756fa03230b7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/81bc2e42388c76a8cc35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/81bc2e42388c76a8cc35.json deleted file mode 100644 index b4e831e492a0bd81a12cde05089097ccaaad798f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/81bc2e42388c76a8cc35.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/bccd957d3ee1497242ee.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/bccd957d3ee1497242ee.json deleted file mode 100644 index 93e76193b6c1d3711a16f61faed26036d41f5ebb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/bccd957d3ee1497242ee.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/f11671f04e734a696845.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/f11671f04e734a696845.json deleted file mode 100644 index ee880701b9654c91ea33ac5f52b30b9d1f80e03f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Meta-Llama-3.1-8B/f11671f04e734a696845.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json deleted file mode 100644 index b2d619ec41f2d494df34a9fc6022c46d8f4b865e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json deleted file mode 100644 index a00e9f9fd54626706dccca740d785856b32fa366..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json deleted file mode 100644 index 0b2822e3d9c6b74b411de13080dff12b22502d96..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 48, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 48, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json deleted file mode 100644 index cfa0dea1208bad41e59a4ea0864e9c2831e711cb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 64, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 64, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json deleted file mode 100644 index 807e19641aed624b05b86dc3a7a1bc6f89060b95..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json deleted file mode 100644 index 1a155680f9bd7719a9694e3bd562aaad5142a0ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json deleted file mode 100644 index 62d3175ebbb29bf91ccd2471098e079b28f637f0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.1-8B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", - "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json deleted file mode 100644 index 02c56db54e7b21c1fce3f9bbfc967188854ed825..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json deleted file mode 100644 index c36073f359a74d77dfc73208395c8f9b919aa26b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/a01ee1e6bc19cfb5be32.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/a01ee1e6bc19cfb5be32.json deleted file mode 100644 index dce03b8705a91f2a460cb4a536eed5b043b335b6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/a01ee1e6bc19cfb5be32.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json deleted file mode 100644 index b680c8c95d835b796c22606170cf84065c1f1201..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json deleted file mode 100644 index 55266c252a4449a1d7d5427c6e4d949f507caab0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json deleted file mode 100644 index 8be472e58e9f7e7e1b98bf9f520925eec2455df2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json deleted file mode 100644 index 74d7c6c2ac38c2ea9868b970b2de7c5c314fcd88..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev7", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json deleted file mode 100644 index 50604097869663df0030290ec866005e901fa863..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/8ec517e0cfd864a5a88a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/8ec517e0cfd864a5a88a.json deleted file mode 100644 index 6d439a0fc244e0432846bd5eb134982fd3181c08..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/8ec517e0cfd864a5a88a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/0bceb25c9f91168e7e3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/0bceb25c9f91168e7e3c.json deleted file mode 100644 index 7e32df7323ba33fcb15f1c86f07cd681b60c3e21..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/0bceb25c9f91168e7e3c.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/5892de0c4962e09fa0f3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/5892de0c4962e09fa0f3.json deleted file mode 100644 index 3dfe0a6425749fce7b42917ebb34e36937c83ae7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/phi-4/5892de0c4962e09fa0f3.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json deleted file mode 100644 index 81a0158c06fbd297c66a85d87645058c69c0035b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json deleted file mode 100644 index 845543a70ddef9411724c14cd900011a71fb1d5c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json deleted file mode 100644 index f948edf0b481bf62102ffed432c74180077f7184..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/0ac4cec816400c535e31.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/0ac4cec816400c535e31.json deleted file mode 100644 index cd2f6c97808707cde5e9d35a295c735cd26f0fd1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/0ac4cec816400c535e31.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/13b73d25749de51f93be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/13b73d25749de51f93be.json deleted file mode 100644 index f31547fe12b96d487b986cd58909ac24f97ee1b7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/13b73d25749de51f93be.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json deleted file mode 100644 index 05f06b6ee29d23322e2959ed1b3c8aec20e1b5ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 128, - "tp_degree": 24 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json deleted file mode 100644 index cb3eadc5911a214fe5086d17e74b9ffa6624117d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/5f4b9a0a5175a5ee5097.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/5f4b9a0a5175a5ee5097.json deleted file mode 100644 index baf52b112aa0deb01d4d30434efb119b67adcd54..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/5f4b9a0a5175a5ee5097.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/f4fb00ba5317979c9f56.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/f4fb00ba5317979c9f56.json deleted file mode 100644 index 79b3c31b2e6373d2f68ca1e535d5ad34169f9ea4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-1.5B/f4fb00ba5317979c9f56.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/82c88872aab33690e7f0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/82c88872aab33690e7f0.json deleted file mode 100644 index 0654de085ab661a0d3721bcdec4b382301be1bb3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/82c88872aab33690e7f0.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/8a68853670fc84af5062.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/8a68853670fc84af5062.json deleted file mode 100644 index 10414913b692cbeb6def41e04e3fb15c55ecfb66..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-14B/8a68853670fc84af5062.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/898176f767d85650b19b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/898176f767d85650b19b.json deleted file mode 100644 index ae4b482f2fc50d037d5169dfe56fb4de16490cc7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/898176f767d85650b19b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/dcb7b98230e1eb5de94e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/dcb7b98230e1eb5de94e.json deleted file mode 100644 index 2629ecd0cd5c7fd5c3f53d3e6f3b7123077a6f73..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-32B-Instruct/dcb7b98230e1eb5de94e.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-72B-Instruct/a5b746b704f4f07a1842.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-72B-Instruct/a5b746b704f4f07a1842.json deleted file mode 100644 index 881d5a0f43d2b524bd7ed2e262fac91acabb2b83..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-72B-Instruct/a5b746b704f4f07a1842.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/19ddcd33e5f674281387.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/19ddcd33e5f674281387.json deleted file mode 100644 index 6a0f47f6cf7e98338e10eda93bd65db6987d57c6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/19ddcd33e5f674281387.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/2aba7f859a48050081b9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/2aba7f859a48050081b9.json deleted file mode 100644 index e614ef8f863f39d9342a621748060a64101cc7a5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/2aba7f859a48050081b9.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/6b31c97326091d004e2b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/6b31c97326091d004e2b.json deleted file mode 100644 index 70d01ed20418cebfdbc99617a9e63a457acbd080..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/6b31c97326091d004e2b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/be55e6ae7d0f114f01af.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/be55e6ae7d0f114f01af.json deleted file mode 100644 index 045e8ff072ca4348b71e80cdada5e53203690f6c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/be55e6ae7d0f114f01af.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/f41c216be53875203b63.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/f41c216be53875203b63.json deleted file mode 100644 index fe28735bdd2f2a61d8bf16c0cd6664be917738f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-7B-Instruct/f41c216be53875203b63.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/46289735f583925e4364.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/46289735f583925e4364.json deleted file mode 100644 index 2862e094808e9027a5e01d2b5d1ed26a958de9f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/46289735f583925e4364.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/f67de7a2d63fffcaaed4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/f67de7a2d63fffcaaed4.json deleted file mode 100644 index 41c09fe1cd3690298fff6c6762923308c5ff3c3b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/f67de7a2d63fffcaaed4.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/435332c9c0f53795ced9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/435332c9c0f53795ced9.json deleted file mode 100644 index d401add47fb989a2a93dfd8c42dd9ab0966a04c6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/435332c9c0f53795ced9.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/de7f62953845a8867133.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/de7f62953845a8867133.json deleted file mode 100644 index 048e8d5b85d2c6f4e40a613b554d5758de96010a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/de7f62953845a8867133.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/b40dffd1dd946c7fbef7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/b40dffd1dd946c7fbef7.json deleted file mode 100644 index ef3bfcdf879499773bea4e7b9ad8e25c710c7929..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/b40dffd1dd946c7fbef7.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/f730a546178347cfc1f2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/f730a546178347cfc1f2.json deleted file mode 100644 index 2a2cdba56161b6c84600914098cdebd39500492d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/f730a546178347cfc1f2.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/345ff82e1c1372955971.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/345ff82e1c1372955971.json deleted file mode 100644 index 92a81ba7d0394af1f1321ae58cd8dfa932adf29b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/345ff82e1c1372955971.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4714696a3dfca901a9b2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4714696a3dfca901a9b2.json deleted file mode 100644 index 03aa25cbeb2f5dc8f2d52924a6399d2bdafc87cc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4714696a3dfca901a9b2.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9ba9d46f62fa47da291e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9ba9d46f62fa47da291e.json deleted file mode 100644 index 3474d17dcf68cc0bb16e369a43ee5d0a8d2fb9c9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9ba9d46f62fa47da291e.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c3ccd6a40d468258b733.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c3ccd6a40d468258b733.json deleted file mode 100644 index 666e3333bbd56194bd2b1152822a36fd34f2e07e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c3ccd6a40d468258b733.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fa9386172884dcac2164.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fa9386172884dcac2164.json deleted file mode 100644 index 6f48c7a19169acc82eb8cd5f81c6900b416eb7f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fa9386172884dcac2164.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json deleted file mode 100644 index 72cb5de1ae78e32428fe8339aad8f7be520de1f5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json deleted file mode 100644 index 8d6ac36b691efecd4591e14a09deb5a073a8cd71..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json deleted file mode 100644 index 5e30b9b4c586401ff72ddf79ba17f3df6d9afd50..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev7", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/19ea6642be132232cfad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/19ea6642be132232cfad.json deleted file mode 100644 index 554c9856ac360a572fdf5d8a476913a74a922204..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/19ea6642be132232cfad.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/4265d6f350c28c936ee3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/4265d6f350c28c936ee3.json deleted file mode 100644 index 56230a15ad73f01492988286569128fb3b36bc2d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/4265d6f350c28c936ee3.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/61c9864c61ccbba9f74c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/61c9864c61ccbba9f74c.json deleted file mode 100644 index a125b5f47157d251a556fb35c616f9bac190acf9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/61c9864c61ccbba9f74c.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/93c2975eebb99ef3b0c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/93c2975eebb99ef3b0c7.json deleted file mode 100644 index 3f8db0e66282341e8db0a1c96c4353f71b24dfe3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/93c2975eebb99ef3b0c7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/b2baf933e067510df735.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/b2baf933e067510df735.json deleted file mode 100644 index 6ec8e63d64a61ca056f236dcab030eebed12f797..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-2b-instruct/b2baf933e067510df735.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/31efe84955d57918ebcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/31efe84955d57918ebcf.json deleted file mode 100644 index e7104aab5e84b2494741cf5a8017dabd26a1c6c5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/31efe84955d57918ebcf.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/62a2d0b4aaf17f61e354.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/62a2d0b4aaf17f61e354.json deleted file mode 100644 index e38d0cc734e728616b4965502b36aa3bdc8cc9a0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/62a2d0b4aaf17f61e354.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/9fa527cd5785253269c4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/9fa527cd5785253269c4.json deleted file mode 100644 index 839f360e5fde7315c767c3f3acb7edc16a2a2203..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/9fa527cd5785253269c4.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/a686163266269a72ea62.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/a686163266269a72ea62.json deleted file mode 100644 index 76015eec2c18a8818ac3e89ec3dc2a08e14a087b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/a686163266269a72ea62.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/fdb056f78cf01fc0bc68.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/fdb056f78cf01fc0bc68.json deleted file mode 100644 index 1381707a860b0ba84e0f74e098a10c13c7362add..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/granite/ibm-granite/granite-3.1-8b-instruct/fdb056f78cf01fc0bc68.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1704e20adc1686bdd230.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1704e20adc1686bdd230.json deleted file mode 100644 index 7afcb29d4111a598e92b4ca55b414a2ed3374286..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1704e20adc1686bdd230.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/53e19a43f1856df8e28f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/53e19a43f1856df8e28f.json deleted file mode 100644 index c2a716d764f349e3fb7d0df8b62bfac0ac493fc2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/53e19a43f1856df8e28f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/554606e48123988c2d11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/554606e48123988c2d11.json deleted file mode 100644 index 4db1bee661ed13313c7d3b09bb1c2d561032e900..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/554606e48123988c2d11.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5c9154e52abdaff39739.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5c9154e52abdaff39739.json deleted file mode 100644 index 031a5bd5d1bd584280d659c8d486d7accb99b3b5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5c9154e52abdaff39739.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/74734a7db03453035286.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/74734a7db03453035286.json deleted file mode 100644 index de263f615d2e98896b1a897f4a7d92067bf0d116..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/74734a7db03453035286.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c39b9a02440f566657a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c39b9a02440f566657a0.json deleted file mode 100644 index cc023ebc3cfa7191d18aca401ff206373ebacee2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c39b9a02440f566657a0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d0f200a73b035bf13893.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d0f200a73b035bf13893.json deleted file mode 100644 index 3f58cd88f61015877d208dff8320b3355bf8b576..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d0f200a73b035bf13893.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json deleted file mode 100644 index 04b841f91f144d6baf7289f8bebb4105856fb74d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 131072, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json deleted file mode 100644 index a966328b9b6e7df3d1d560c1d8d6a5b37c087930..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json deleted file mode 100644 index 9a095b9b4c396db8c9c28871b2c919f7ca525c32..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json deleted file mode 100644 index e92a5e79e5bc6e787869669ed30ddf237ca07613..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/7ea2445abdd9b122e78e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/7ea2445abdd9b122e78e.json deleted file mode 100644 index 03dd5c4c1eeb0310baad62646ec08c9b3b0718d9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/7ea2445abdd9b122e78e.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/9156effa6a32043eedd7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/9156effa6a32043eedd7.json deleted file mode 100644 index d9833b53ec00efd64677678909da6fe0e285b4dd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/lmsys/vicuna-7b-v1.5/9156effa6a32043eedd7.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-13b-hf/2a3006c3263fc21f2ece.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-13b-hf/2a3006c3263fc21f2ece.json deleted file mode 100644 index 7cb255a83100a4f2a4db472d6feb7430fb814677..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-13b-hf/2a3006c3263fc21f2ece.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-7b-hf/f87f7c881d9666017d92.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-7b-hf/f87f7c881d9666017d92.json deleted file mode 100644 index 599ab3a4d9a90cd2c575c8480abb4c5cf60a26c8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-2-7b-hf/f87f7c881d9666017d92.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/5d48e6b6990be2c43eb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/5d48e6b6990be2c43eb1.json deleted file mode 100644 index daa7be6788cc6c7387ef88b6da366a8f8aa18c2f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/5d48e6b6990be2c43eb1.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/fa11807a4c937a56d0b6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/fa11807a4c937a56d0b6.json deleted file mode 100644 index 9114986805a9e6de156b8d4084ec13c17879e70e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.1-70B-Instruct/fa11807a4c937a56d0b6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json deleted file mode 100644 index 2e3c2d4152ed1ea02327fc52a12893202e441eda..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json deleted file mode 100644 index f38e7453723601092cd245634eaa887650a29105..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B/4d597e926b7608780c4e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B/4d597e926b7608780c4e.json deleted file mode 100644 index 623e15a0835aa9387972d542742710070f3e6778..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B/4d597e926b7608780c4e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-3B/d3d892973e2ba88a76d0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-3B/d3d892973e2ba88a76d0.json deleted file mode 100644 index 27c62bbfff9939c759a8442317f11c1a8f23ee8e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-3B/d3d892973e2ba88a76d0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3-8B/ee5f10bd1e194e587442.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3-8B/ee5f10bd1e194e587442.json deleted file mode 100644 index 071fdd817c2a4c6ef187699aa806ec5664d9cb86..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3-8B/ee5f10bd1e194e587442.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c236c1e452818f65af50.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c236c1e452818f65af50.json deleted file mode 100644 index 316a3208a2bd9baee229839b73f8b00c21e199a3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c236c1e452818f65af50.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c4e7d87bd42468196269.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c4e7d87bd42468196269.json deleted file mode 100644 index cea0bd2f9b67c0a91712000394123da3a84d6ec1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/c4e7d87bd42468196269.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/d2fe71759c094e35b565.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/d2fe71759c094e35b565.json deleted file mode 100644 index 62200927ca5a99f26cff18a0cce8c982f76a7b48..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/d2fe71759c094e35b565.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/dcc3970157fea8cb38ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/dcc3970157fea8cb38ff.json deleted file mode 100644 index 762c172926deaedcf229bf1c4cea43f2d28b46ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/dcc3970157fea8cb38ff.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ddc9c0529f4b1472d7bb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ddc9c0529f4b1472d7bb.json deleted file mode 100644 index 6b7f984abc9b0c0a7083dd92bcc70e17ed419264..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ddc9c0529f4b1472d7bb.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ec9707b4588c437a95c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ec9707b4588c437a95c7.json deleted file mode 100644 index b31a81aac9b5415f50cafa204e9dc6628e5079f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Meta-Llama-3.1-8B/ec9707b4588c437a95c7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/16d94be4dea0f81b3f74.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/16d94be4dea0f81b3f74.json deleted file mode 100644 index 923c42fc97d244e949aac06589619908f7b4e539..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/16d94be4dea0f81b3f74.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/cde85d16f1a8ae501740.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/cde85d16f1a8ae501740.json deleted file mode 100644 index d3f932c3fe185850e8e6fe533ae99448382123e2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/princeton-nlp/Sheared-LLaMA-1.3B/cde85d16f1a8ae501740.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json deleted file mode 100644 index bd16e8e9aec3529faadc06da6985f4c212d1bd7e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json deleted file mode 100644 index 02711815524046d82f266d875dc2a732f03cd302..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json deleted file mode 100644 index 2df82512c87ac793b3df5abbaa5f6017c3c0b5e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0.dev8", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json deleted file mode 100644 index 004318bc72347ff6f3f976d1c98189cb0c817873..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f0c972d8b339c765f87a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f0c972d8b339c765f87a.json deleted file mode 100644 index a45b871653a5e576512b10215a4ceeb9ebc9b531..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f0c972d8b339c765f87a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f7bc31b4aa6bdafa07c4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f7bc31b4aa6bdafa07c4.json deleted file mode 100644 index ff863f43db1b5b40310e5869ca46e6dc7db24621..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/Phi-3-mini-4k-instruct/f7bc31b4aa6bdafa07c4.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/2df2897b4bd2ba998bcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/2df2897b4bd2ba998bcf.json deleted file mode 100644 index 95b74e7780a092e1d635528858ad7edd1e518e99..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/2df2897b4bd2ba998bcf.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/f7856ad02adeacede4df.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/f7856ad02adeacede4df.json deleted file mode 100644 index 939675fa0831d9d25ebe63ac460158b4a83e55a9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/phi3/microsoft/phi-4/f7856ad02adeacede4df.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/230a731a57276d443a13.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/230a731a57276d443a13.json deleted file mode 100644 index 028174c349e2def14ace11dd34ecf3f73ed146a0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/230a731a57276d443a13.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json deleted file mode 100644 index 95d9d5279ccdff340b15b836ce0b4d5ea8ef6973..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/e40d69bbe96e92f41c00.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/e40d69bbe96e92f41c00.json deleted file mode 100644 index 33b88c1e2cdede9501a5a6b24a446b44694205fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/e40d69bbe96e92f41c00.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/838b0cc77e2625755e3f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/838b0cc77e2625755e3f.json deleted file mode 100644 index e57f85751fe8abd6345d96dfcc1a8386a2a972a9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/838b0cc77e2625755e3f.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/d97c609f740416df5f39.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/d97c609f740416df5f39.json deleted file mode 100644 index 10064f355c08cdd0a0229899c19af10618662adb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-1.5B/d97c609f740416df5f39.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/cf434240980f5c31b0f4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/cf434240980f5c31b0f4.json deleted file mode 100644 index f1341ae59f657d620f365cc82d8d0a069db258ad..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/cf434240980f5c31b0f4.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/d3686c4f626713f2bbed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/d3686c4f626713f2bbed.json deleted file mode 100644 index 8b2c9d1a024d60d1b73964b6d331b1741d986da2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-14B/d3686c4f626713f2bbed.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/745bd890a25c64f6b490.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/745bd890a25c64f6b490.json deleted file mode 100644 index 364bb4fe879fff4e4e6e5928e14891baca0ff2e7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/745bd890a25c64f6b490.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/b6848474a354bbd2587b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/b6848474a354bbd2587b.json deleted file mode 100644 index e7e5812234e981cb7b692eeb2b6885edd69407ea..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-32B-Instruct/b6848474a354bbd2587b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-72B-Instruct/52941b4626fe868bdd2f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-72B-Instruct/52941b4626fe868bdd2f.json deleted file mode 100644 index 57c3321d5a8e8dec5855584687f7c914730dc4da..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-72B-Instruct/52941b4626fe868bdd2f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/6c2f61fbf44f0b35b721.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/6c2f61fbf44f0b35b721.json deleted file mode 100644 index 7f5ff78c85468a1b3577b393bdb381c086f2ce4b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/6c2f61fbf44f0b35b721.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/94fcaf8da51a3c92291a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/94fcaf8da51a3c92291a.json deleted file mode 100644 index 46ba55c21f7a3869383d62305818a979541a9917..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/94fcaf8da51a3c92291a.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/a57f8d35abbbf15b911d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/a57f8d35abbbf15b911d.json deleted file mode 100644 index 73cf0e4cb863e98ee8a31b621381de117a32ca24..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/a57f8d35abbbf15b911d.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/b65c0ef09fd4c5c7288b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/b65c0ef09fd4c5c7288b.json deleted file mode 100644 index 366cddf53fa9276e33ef674ce6f616d0458d9263..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/b65c0ef09fd4c5c7288b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/f8167ef685f208b49a7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/f8167ef685f208b49a7c.json deleted file mode 100644 index 1a4cb51cb4bb1812e6ce411416cdf10f7aaebd50..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-7B-Instruct/f8167ef685f208b49a7c.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/07026d43387458f071cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/07026d43387458f071cc.json deleted file mode 100644 index 7c37c51efca232a97ee6f3a803db93fed8b663c6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/07026d43387458f071cc.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/be095474323de6419234.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/be095474323de6419234.json deleted file mode 100644 index 39e97a17542c4704943da9f6d4305190600ca2cb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/be095474323de6419234.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/27ed21c1197030fb610a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/27ed21c1197030fb610a.json deleted file mode 100644 index 9dcc0dd35c496007dd4feff04e42632008f4833e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/27ed21c1197030fb610a.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/649fc42ec78d0ae4771d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/649fc42ec78d0ae4771d.json deleted file mode 100644 index c062b52016bd7d3765a1dbaf504cfcab10c61bdc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/649fc42ec78d0ae4771d.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/0c7fa765f3f22a519044.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/0c7fa765f3f22a519044.json deleted file mode 100644 index 1befe4352461c8d666c6bbc3a68cdbc5fa6da908..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/0c7fa765f3f22a519044.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a0454e134adf108990eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a0454e134adf108990eb.json deleted file mode 100644 index 0e5cd948058d7d3fadb8007e5c3aed02c19f0287..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a0454e134adf108990eb.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/38e093f6b21a273e8c05.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/38e093f6b21a273e8c05.json deleted file mode 100644 index b8e51971dc025ac8d1863e1c7aac12f061476d93..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/38e093f6b21a273e8c05.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/416ab00bb7bd661fca82.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/416ab00bb7bd661fca82.json deleted file mode 100644 index eb08b184b9395226ec83d807472355b2f74b8fdc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/416ab00bb7bd661fca82.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4339b88c603a465c7306.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4339b88c603a465c7306.json deleted file mode 100644 index d08d2fbe66815fdf3e5af07a77acdd38123d1830..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4339b88c603a465c7306.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/78efe86cd446f5ef95e6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/78efe86cd446f5ef95e6.json deleted file mode 100644 index ec91849f7e2a5aec8fd7a3b76d5a4aecf1956b2d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/78efe86cd446f5ef95e6.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bee9258eb84cf5899bd7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bee9258eb84cf5899bd7.json deleted file mode 100644 index 40b18339f1f8f7b5fe8d7a7b2686a1e12c391182..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bee9258eb84cf5899bd7.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0.dev8", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/05c9cb13b974d4bfbb70.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/05c9cb13b974d4bfbb70.json deleted file mode 100644 index ef86a8ee1da12eb67729306ca3e4d59bd16b627f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/05c9cb13b974d4bfbb70.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/182f570e395a76ac2d2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/182f570e395a76ac2d2c.json deleted file mode 100644 index 37451622c88415313cc5b2f1e323877c998acc3c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/182f570e395a76ac2d2c.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/7def1dfbe4c4fd9f87fd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/7def1dfbe4c4fd9f87fd.json deleted file mode 100644 index 05bf64337625dabccb1ac434b56f688a7dffe6f4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/7def1dfbe4c4fd9f87fd.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/2a0e52aa09e43341909e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/2a0e52aa09e43341909e.json deleted file mode 100644 index bc4e1d36e41b6d04f4e302a430e9d0b80e2679e3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/2a0e52aa09e43341909e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/6d2608eed316f305c5bb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/6d2608eed316f305c5bb.json deleted file mode 100644 index 507a8f6513a2114d3b90767731e35611b6480c7a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/6d2608eed316f305c5bb.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/8e93e8f118272809c2f5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/8e93e8f118272809c2f5.json deleted file mode 100644 index 804bf83cafe995ca5d0a9032f56922dc9ce8293d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/8e93e8f118272809c2f5.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/99ac93dceb3c73b2fa4b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/99ac93dceb3c73b2fa4b.json deleted file mode 100644 index 938f9779e80c2155ae29a875ecc2595d3ead34e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/99ac93dceb3c73b2fa4b.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/ee761b3178e03abb295f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/ee761b3178e03abb295f.json deleted file mode 100644 index 1e6727c6981cf28aa4eeead54b81bf464618435a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-2b-instruct/ee761b3178e03abb295f.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/1220320796f33d31afdd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/1220320796f33d31afdd.json deleted file mode 100644 index 7877da8c365c43362141631cb616347820ef4d6b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/1220320796f33d31afdd.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/200d343f2e1db055e745.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/200d343f2e1db055e745.json deleted file mode 100644 index 20952cf744d5b578c00b5d87b2a2529a604e502e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/200d343f2e1db055e745.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/28ccfb47b7885800b798.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/28ccfb47b7885800b798.json deleted file mode 100644 index 51694254bb71543e07daf950c1a53a5921518e9e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/28ccfb47b7885800b798.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/2d26393d136ab33db873.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/2d26393d136ab33db873.json deleted file mode 100644 index de39031c12d4327f685ebad0ad073aaed3536d26..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/2d26393d136ab33db873.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/adf8e5cb16ba08d75040.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/adf8e5cb16ba08d75040.json deleted file mode 100644 index 1e36232de5c53b720e2fb540023656916715199d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/granite/ibm-granite/granite-3.1-8b-instruct/adf8e5cb16ba08d75040.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8c7bafb7e52cef9e8f70.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8c7bafb7e52cef9e8f70.json deleted file mode 100644 index d7e16a4136be0c2c99baf9e91c4be187103475e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8c7bafb7e52cef9e8f70.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/133899fba3df8249a54b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/133899fba3df8249a54b.json deleted file mode 100644 index 359dba480ef11b5929e7c4782f3c16e062ee9305..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/133899fba3df8249a54b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/516bf42135f1a627e22e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/516bf42135f1a627e22e.json deleted file mode 100644 index 7e447ee89cc13469ca67d972b7fdb79a596402bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/516bf42135f1a627e22e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/91e261ffa104e2ec4ed0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/91e261ffa104e2ec4ed0.json deleted file mode 100644 index b0a8779ebe2c53dca00892a7069b451d3587f5ac..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/91e261ffa104e2ec4ed0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/aadf2390428897192f1a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/aadf2390428897192f1a.json deleted file mode 100644 index afbef527c0aaaf9f0cad48af29500156b18e0d72..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/aadf2390428897192f1a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c35d5063e58f4f2a7425.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c35d5063e58f4f2a7425.json deleted file mode 100644 index 3a07c0f77bdad6abbe850977cb8e30960157ae11..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c35d5063e58f4f2a7425.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/da18c06e5f6fef178fc9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/da18c06e5f6fef178fc9.json deleted file mode 100644 index 707b2c8ba34081b0d694938dffd8e39d908bd5db..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/da18c06e5f6fef178fc9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/081c145e831213ab718b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/081c145e831213ab718b.json deleted file mode 100644 index 7fe79c7e65da7d22ba842889052c453dd413b49a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/081c145e831213ab718b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/087436195a7333857a9f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/087436195a7333857a9f.json deleted file mode 100644 index 4caec6d84ab4065206865cb35990a7171b6184cf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/087436195a7333857a9f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2308b99b68d27ecfdc41.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2308b99b68d27ecfdc41.json deleted file mode 100644 index ff7776b8353ef5219b701815c63d2503f720904b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2308b99b68d27ecfdc41.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2c27b201142fed60ee5b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2c27b201142fed60ee5b.json deleted file mode 100644 index e796f6e0d3838cab1abc9eb3e08f2e0096ad7dd0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/2c27b201142fed60ee5b.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/304cd41ac7d9c2303bd3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/304cd41ac7d9c2303bd3.json deleted file mode 100644 index df65401a77e7399f56d6813ca1fa48a2cf7d5a08..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/304cd41ac7d9c2303bd3.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/7f5cf6b207ab7e9d6384.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/7f5cf6b207ab7e9d6384.json deleted file mode 100644 index 4367543e5e7e98e21aff9c34320b88393331e8ac..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/llamafactory/tiny-random-Llama-3/7f5cf6b207ab7e9d6384.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f0017d9547c3403e39f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f0017d9547c3403e39f9.json deleted file mode 100644 index f052420e070afdbbc36dc464b1723250a350863c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f0017d9547c3403e39f9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f427d288d89336c5c068.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f427d288d89336c5c068.json deleted file mode 100644 index 622b1fd59969352372f20edaafc21e08514541e8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/lmsys/vicuna-7b-v1.5/f427d288d89336c5c068.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-13b-hf/ec8241a7e734d28748dd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-13b-hf/ec8241a7e734d28748dd.json deleted file mode 100644 index e3cd82d6550bfbf80f4d05278713f5a0ac6cdffa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-13b-hf/ec8241a7e734d28748dd.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-7b-hf/f0791906af23f0d10fc1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-7b-hf/f0791906af23f0d10fc1.json deleted file mode 100644 index d392a1cfb7b5157811d83c226447af3611bca975..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-2-7b-hf/f0791906af23f0d10fc1.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/ddb1d9d686e7b54edb38.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/ddb1d9d686e7b54edb38.json deleted file mode 100644 index 9d530f525a35d8ffca6ffad746ecf7bd9bb48761..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/ddb1d9d686e7b54edb38.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/e7328e6b072f15f59608.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/e7328e6b072f15f59608.json deleted file mode 100644 index d80995fe5db769f923cbab58d2a3db3440f746e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.1-70B-Instruct/e7328e6b072f15f59608.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json deleted file mode 100644 index 2edcdf26f9a27207f116fe0e6410d67805c97a0d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", - "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B/97be41d446bdc9506e7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B/97be41d446bdc9506e7c.json deleted file mode 100644 index 7c4f8cde36d663f6e539485c140116fe49b731f4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B/97be41d446bdc9506e7c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-3B/a30d00f76ebcd5572fc2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-3B/a30d00f76ebcd5572fc2.json deleted file mode 100644 index 4788200480ef30843ff941041e2ef1d28d6b1d18..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-3B/a30d00f76ebcd5572fc2.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3-8B/253438b0c01af15d203b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3-8B/253438b0c01af15d203b.json deleted file mode 100644 index fa7907dde3c7bb59c6a90a47b1092a642ee8918e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3-8B/253438b0c01af15d203b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/0ce4011031a7ed5c172b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/0ce4011031a7ed5c172b.json deleted file mode 100644 index e66d0c94c95eb45cefad64987133cb08e681a10f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/0ce4011031a7ed5c172b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/4281efd85632a5789ecf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/4281efd85632a5789ecf.json deleted file mode 100644 index ac6e31074a744e9ba0ba46d82616ebf9ee425b05..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/4281efd85632a5789ecf.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/821893b5920c91702441.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/821893b5920c91702441.json deleted file mode 100644 index c0e549044a2fedf45e72a036408fcef3c915dcc2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/821893b5920c91702441.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/a32a3fd45e8abcf0b783.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/a32a3fd45e8abcf0b783.json deleted file mode 100644 index 9ebc10e301ad933acf1be741dec20621aeb0a5e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/a32a3fd45e8abcf0b783.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/abbcdc74baa6d4561c9e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/abbcdc74baa6d4561c9e.json deleted file mode 100644 index 90f37f232d1281b13fc2ccf1d0d410f955e80493..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/abbcdc74baa6d4561c9e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/bd9563537b47cee8383e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/bd9563537b47cee8383e.json deleted file mode 100644 index 9fbbdeec6d9b65bb63cec474310aeaa8af01d384..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Meta-Llama-3.1-8B/bd9563537b47cee8383e.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/1d0e302d5c5a8ce527dc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/1d0e302d5c5a8ce527dc.json deleted file mode 100644 index d98c42935a8c00cb5f13a3882a1c552cb7fa464d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/1d0e302d5c5a8ce527dc.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/2571866de9c0f0dc6eff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/2571866de9c0f0dc6eff.json deleted file mode 100644 index 70fa3789d8a52d2689ca21dbebd203b3b938215a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/princeton-nlp/Sheared-LLaMA-1.3B/2571866de9c0f0dc6eff.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json deleted file mode 100644 index a05dc5c89377210d858153cbc96b1a5f0919ccb3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/5e144164d13014b24277.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/5e144164d13014b24277.json deleted file mode 100644 index 8b22c9a1b4604171901cc270a00f5d5e2dbf897d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/5e144164d13014b24277.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/60969fb571a659b30503.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/60969fb571a659b30503.json deleted file mode 100644 index 638e16e2ec88da9a4c7331cea7f09ef4f941a06b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/60969fb571a659b30503.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/a150e7b231e1cdaec17c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/a150e7b231e1cdaec17c.json deleted file mode 100644 index eb807a16ef7a031db30c77e73b7037797aa76b87..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/a150e7b231e1cdaec17c.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/c8d149891550d3793bb9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/c8d149891550d3793bb9.json deleted file mode 100644 index f90588ec94319065b90d7e9fb00655fb562f8053..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/c8d149891550d3793bb9.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/4e14b2bd4d311cf1902f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/4e14b2bd4d311cf1902f.json deleted file mode 100644 index 07b9249aafe639683bf3be7b242194599deace1b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/4e14b2bd4d311cf1902f.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/7d7a8f9e25ad161672e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/7d7a8f9e25ad161672e4.json deleted file mode 100644 index e42d98c056a12e406af62bddea005914d7bd6c0e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/7d7a8f9e25ad161672e4.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/fa8340534c295e77e3b6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/fa8340534c295e77e3b6.json deleted file mode 100644 index cc3d3a922c52bfd4fc0ccee6cb8012f0aba97d56..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/mixtral/dacorvo/Mixtral-tiny/fa8340534c295e77e3b6.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/4f60282e2fff2e9277cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/4f60282e2fff2e9277cc.json deleted file mode 100644 index 3930a6ab910cf3158cf0feede1e1fed8c15a0686..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/4f60282e2fff2e9277cc.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/de66585753a3d1baf4cb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/de66585753a3d1baf4cb.json deleted file mode 100644 index 578774ac28f6d5105220fee2583e7a0071046d17..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/Phi-3-mini-4k-instruct/de66585753a3d1baf4cb.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/a5b7ae259342101d37e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/a5b7ae259342101d37e8.json deleted file mode 100644 index 865840d5f04dc0ca43d6974a378dca2ab91a61cc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/a5b7ae259342101d37e8.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/c3087af55a0d92140fbb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/c3087af55a0d92140fbb.json deleted file mode 100644 index b427cc9abe429dfe544027d362b64ca58047ddc3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/microsoft/phi-4/c3087af55a0d92140fbb.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/732f0a646f3fead04cc6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/732f0a646f3fead04cc6.json deleted file mode 100644 index 3c5a515bbd195226d0332b6eb5ad516b047de3c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/732f0a646f3fead04cc6.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/dd2dd68e4186f608717a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/dd2dd68e4186f608717a.json deleted file mode 100644 index 159032fed683628da60f0d9764c1d9b666f0c4db..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/dd2dd68e4186f608717a.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/ebcb161fd1f8a2eaaf0a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/ebcb161fd1f8a2eaaf0a.json deleted file mode 100644 index fb74d72ac7d483826fa0e067f571b3493600cd09..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/phi3/yujiepan/phi-4-tiny-random/ebcb161fd1f8a2eaaf0a.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/1454002290e6730d8902.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/1454002290e6730d8902.json deleted file mode 100644 index d3d6c383b345f55d8ab8f3c82a639094e0cabc71..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/1454002290e6730d8902.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/27665e694943c1798237.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/27665e694943c1798237.json deleted file mode 100644 index 726ff4f71c66f987dafe617a0088aea2244ebff4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/27665e694943c1798237.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/b00bcfa95e9e0de17a81.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/b00bcfa95e9e0de17a81.json deleted file mode 100644 index 9a80ab21e47b40f00be2bd125548a24722bcf058..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-0.5B/b00bcfa95e9e0de17a81.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/5c44e5afd9464ac8c687.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/5c44e5afd9464ac8c687.json deleted file mode 100644 index 458c16d96f2502fa8db33f6d462206a16dca2e1b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/5c44e5afd9464ac8c687.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/7ed2f4a7357718f9b517.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/7ed2f4a7357718f9b517.json deleted file mode 100644 index 60e4f8fe88bedd0f2010d9c25e49448c677deb54..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-1.5B/7ed2f4a7357718f9b517.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/59d90cc19027c65f345a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/59d90cc19027c65f345a.json deleted file mode 100644 index fb3884723637b993a648c5b641b8b34638fcabd5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/59d90cc19027c65f345a.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/d5732cdb1a4594da9f91.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/d5732cdb1a4594da9f91.json deleted file mode 100644 index 561eb10fa67c37f7439843f6edffa93ba612d437..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-14B/d5732cdb1a4594da9f91.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/bfb5fa0d6043a14404fb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/bfb5fa0d6043a14404fb.json deleted file mode 100644 index 203094e65bbbb0023bfd15fceecfd2cceb486efa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/bfb5fa0d6043a14404fb.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/c5e6db3c69bc1feb50c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/c5e6db3c69bc1feb50c0.json deleted file mode 100644 index adfa7f69b3f34a24298e4c9076029fb50135973e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-32B-Instruct/c5e6db3c69bc1feb50c0.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-72B-Instruct/c88d1d1f43f1793c82e6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-72B-Instruct/c88d1d1f43f1793c82e6.json deleted file mode 100644 index b821df69fb09f90733938c39311f688160505292..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-72B-Instruct/c88d1d1f43f1793c82e6.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/29362be89cfa4cbb85dc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/29362be89cfa4cbb85dc.json deleted file mode 100644 index b8494cfe05f8ab22b7682f7ebb76c449cdd91f8a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/29362be89cfa4cbb85dc.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/5ed78dbf7aa7cde45721.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/5ed78dbf7aa7cde45721.json deleted file mode 100644 index 8bcd250c737eadb14dc0a740251fe2e453df97e0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/5ed78dbf7aa7cde45721.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/838b71acc12fa594e7fa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/838b71acc12fa594e7fa.json deleted file mode 100644 index 9862abfe77d7ecc83069c65a31fcd2fc331c33f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/838b71acc12fa594e7fa.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/9359d431f70d5c4d9d39.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/9359d431f70d5c4d9d39.json deleted file mode 100644 index 48343f396118cec20c854e14da26a8c54db89cdd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/9359d431f70d5c4d9d39.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/d7069320fac6aa1ed42b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/d7069320fac6aa1ed42b.json deleted file mode 100644 index 5a7f1dfff52f13f7b17054b32bf62bf5a6d03d76..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/Qwen/Qwen2.5-7B-Instruct/d7069320fac6aa1ed42b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/05afb9220c679dee800e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/05afb9220c679dee800e.json deleted file mode 100644 index 1a462552a990dee4be9787845629a2358bd76e7a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/05afb9220c679dee800e.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4a09872fc2606f71dc0d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4a09872fc2606f71dc0d.json deleted file mode 100644 index 53c324154c7dee95263a7fbc1ff95d2ea03ed407..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/4a09872fc2606f71dc0d.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/e5afba0ba48b3e351ebf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/e5afba0ba48b3e351ebf.json deleted file mode 100644 index 36badd9d696765eb40eac02b1888a68d7bcba3f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/e5afba0ba48b3e351ebf.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fabd41afa386830bece9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fabd41afa386830bece9.json deleted file mode 100644 index 31b9ee138f2cbdbe2ebed2bd0266a99bcb1d9637..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/fabd41afa386830bece9.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9c58b9acec05317c6720.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9c58b9acec05317c6720.json deleted file mode 100644 index 7bb29d6e8203f9390e3b22382d9d2f2e4df4e44a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9c58b9acec05317c6720.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9eeaa9f2ea3a06d39c1b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9eeaa9f2ea3a06d39c1b.json deleted file mode 100644 index 6381bf7a2e1a00fed63b8e6f5c5c88b863b24408..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/9eeaa9f2ea3a06d39c1b.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7ef809f5bdec2dd4c5ed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7ef809f5bdec2dd4c5ed.json deleted file mode 100644 index f1acb7f96942036679e7a5e8bc7fe0bd19b33409..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7ef809f5bdec2dd4c5ed.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad1ee63bddd9a821768d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad1ee63bddd9a821768d.json deleted file mode 100644 index 7bff9c8d7bc40ea3fe071e71733fa089d9eca886..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/ad1ee63bddd9a821768d.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bb58bb96fc901d2ebf1a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bb58bb96fc901d2ebf1a.json deleted file mode 100644 index 94d485995b040bba6b60f484297b436e666260e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/bb58bb96fc901d2ebf1a.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c500268b6b46104a6a5b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c500268b6b46104a6a5b.json deleted file mode 100644 index 8b9ab97aa7d63b443ec9f1bb48283c59ceeba20a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c500268b6b46104a6a5b.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5ec3608408aa4a684e5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5ec3608408aa4a684e5.json deleted file mode 100644 index 063274bb8e4dfbea014ce73dd3dfca62e09c6418..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/f5ec3608408aa4a684e5.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7823ff8dffd276c75ce4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7823ff8dffd276c75ce4.json deleted file mode 100644 index 257811965ae23d2e5d1f2a14a37873ffc88d7a4f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7823ff8dffd276c75ce4.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8e67fd7c8f8e393dc42.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8e67fd7c8f8e393dc42.json deleted file mode 100644 index 5d1ce000b3b074f70f537624342ae76de5dff14f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8e67fd7c8f8e393dc42.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/f245d59e6049a5b7842a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/f245d59e6049a5b7842a.json deleted file mode 100644 index 05b5ff6949a78beb28f565fc19c0671720156271..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/qwen2/yujiepan/qwen2.5-128k-tiny-random/f245d59e6049a5b7842a.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.2.0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json deleted file mode 100644 index 3e167a4933e519119cab99364a00f802616e9ac2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "Jingya/pixart_sigma_pipe_xl_2_512_ms", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": 1, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json deleted file mode 100644 index e7c9160f8f01fd8dae0ecee77a5051710472b9ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": null, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/1db509ab22d1011d860d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/1db509ab22d1011d860d.json deleted file mode 100644 index b07b54dbcf50ea1c1833872e654073d32bc33fe9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/1db509ab22d1011d860d.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/2db43e1d3bb2d09fa086.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/2db43e1d3bb2d09fa086.json deleted file mode 100644 index f93b5d4cf4c6a197fcd746228be97db45640a858..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/2db43e1d3bb2d09fa086.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/77b9e2ef38f4a3f95214.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/77b9e2ef38f4a3f95214.json deleted file mode 100644 index 96835cb798db9d8187ea519821d09208d79fdcf8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/77b9e2ef38f4a3f95214.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/867d6fc94f3a50e82fba.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/867d6fc94f3a50e82fba.json deleted file mode 100644 index 39ff01c8331524ccb98c08822cfa4f4e2e2343e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/867d6fc94f3a50e82fba.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/869cb2d02c1441a29462.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/869cb2d02c1441a29462.json deleted file mode 100644 index 3ce88dfec2e281804b547857d5fedc69000401e1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-2b-instruct/869cb2d02c1441a29462.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/38de94d6871612c21c8d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/38de94d6871612c21c8d.json deleted file mode 100644 index d444caff14f58a7f881f667beb94756a9e4e81b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/38de94d6871612c21c8d.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4211f2800f92376eb5a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4211f2800f92376eb5a8.json deleted file mode 100644 index 549e17ac0a0c3b62efad82e985854f9aeb73a79f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4211f2800f92376eb5a8.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4bf19fca5cd68e87c1af.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4bf19fca5cd68e87c1af.json deleted file mode 100644 index f0fc157244c90b1abc129cdaad8a2890f1c4cc3c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/4bf19fca5cd68e87c1af.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/b8ef4d3f8b1dd59f4d6f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/b8ef4d3f8b1dd59f4d6f.json deleted file mode 100644 index 1753bf625ff837e8aee4bc45fb8db17cb3d5cf70..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/b8ef4d3f8b1dd59f4d6f.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/bc6155076bf7591830fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/bc6155076bf7591830fe.json deleted file mode 100644 index 6649202f6f326818439f232ccae9913b1c7e722c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/granite/ibm-granite/granite-3.1-8b-instruct/bc6155076bf7591830fe.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/153699eb9f28ca79e0a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/153699eb9f28ca79e0a6.json deleted file mode 100644 index d54cbeddf90b2f3f90f7435840930fb0eb5a9206..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/153699eb9f28ca79e0a6.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06aa4df2ce8e860d18d5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06aa4df2ce8e860d18d5.json deleted file mode 100644 index ea0c96f00881c14d6585216bf36b5a6618edb902..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/06aa4df2ce8e860d18d5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/261ad94f85448d178670.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/261ad94f85448d178670.json deleted file mode 100644 index bde843b13b01b70e71faa08c460c4dfac0cf9416..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/261ad94f85448d178670.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3f61504fe5a479a435db.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3f61504fe5a479a435db.json deleted file mode 100644 index 6911fced6b2a7a2a3aee61da4b7c61e67cfbab1f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3f61504fe5a479a435db.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/60740400b6ec9d850faf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/60740400b6ec9d850faf.json deleted file mode 100644 index 8b29ed14b22fd3705ab50813c3e28001f375c48d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/60740400b6ec9d850faf.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88f331469018813cf497.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88f331469018813cf497.json deleted file mode 100644 index 354791c5adc433eb8716f542f4f5fd544442bc6d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88f331469018813cf497.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/911be7a33286c9a29b63.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/911be7a33286c9a29b63.json deleted file mode 100644 index e95893e3ede36516813ecac1e21c95594a2324fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/911be7a33286c9a29b63.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e8fd98b6ee228c960696.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e8fd98b6ee228c960696.json deleted file mode 100644 index 52bfb1434b0338e7d2438d6b844a664467cb6732..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e8fd98b6ee228c960696.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/46d7fcf8f4dd5e383580.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/46d7fcf8f4dd5e383580.json deleted file mode 100644 index 2417bfdaf303f44e587de23353e8d44b35314c6b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/46d7fcf8f4dd5e383580.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/51d9075579231063ab09.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/51d9075579231063ab09.json deleted file mode 100644 index c0d05b31ccad5da6ae00791e04a8accbdc7e441f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/lmsys/vicuna-7b-v1.5/51d9075579231063ab09.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-13b-hf/0a3164bd53d8e61b50f6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-13b-hf/0a3164bd53d8e61b50f6.json deleted file mode 100644 index ddef6bbfb660a49dad346037cbbc0ed2454c1f6b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-13b-hf/0a3164bd53d8e61b50f6.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-7b-hf/8bfe37c9e34d24b17701.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-7b-hf/8bfe37c9e34d24b17701.json deleted file mode 100644 index a99ce311c417e87ddd992d01831d9d6fd6845147..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-2-7b-hf/8bfe37c9e34d24b17701.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/02b5b4054fc604324dd7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/02b5b4054fc604324dd7.json deleted file mode 100644 index 4664f92a47afe010c0ef79a3ae1b3e9201bc07fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/02b5b4054fc604324dd7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/9704b5793df84a4e4e6f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/9704b5793df84a4e4e6f.json deleted file mode 100644 index 0891ebd77454ef8705e4d55f60c8bfbf8b84721a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.1-70B-Instruct/9704b5793df84a4e4e6f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-1B/37ad08f2ee146887ae72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-1B/37ad08f2ee146887ae72.json deleted file mode 100644 index e6994f7cf40ce9529738ffe449122b0257a8691e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-1B/37ad08f2ee146887ae72.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-3B/2c9f58891135d28a8335.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-3B/2c9f58891135d28a8335.json deleted file mode 100644 index d437fecd391cac6945acaeea3a5268dda83cab47..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Llama-3.2-3B/2c9f58891135d28a8335.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3-8B/83eba598fc4e860e7380.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3-8B/83eba598fc4e860e7380.json deleted file mode 100644 index 83759714b988c1b602ff882fd30e57ccad23b8b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3-8B/83eba598fc4e860e7380.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/33eca17751b95c45da69.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/33eca17751b95c45da69.json deleted file mode 100644 index 839a8dcfd2e4d73816f878ec0ffcf305ef6a46fb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/33eca17751b95c45da69.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5a34901fdce834e694f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5a34901fdce834e694f9.json deleted file mode 100644 index e76c732ad6d0c8fc4a355ecaf44345494da19725..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5a34901fdce834e694f9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5b5663fd89c90a7de5dc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5b5663fd89c90a7de5dc.json deleted file mode 100644 index f40389aacc02ed4a098972b1fbf1a97c6cde1340..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/5b5663fd89c90a7de5dc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/6c42461f3b71532d2488.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/6c42461f3b71532d2488.json deleted file mode 100644 index 8c51b2932af43862b9a8af11b2d3824f81228e2c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/6c42461f3b71532d2488.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/8311d5dfabe6701e30bc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/8311d5dfabe6701e30bc.json deleted file mode 100644 index 3514a99faec65b64971af09f632b86bb6db575b7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/8311d5dfabe6701e30bc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/890091ca23a1406af6d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/890091ca23a1406af6d3.json deleted file mode 100644 index eba340d7044285766df44519ef2013d5068f775a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/890091ca23a1406af6d3.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/a2e85146ecab24504763.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/a2e85146ecab24504763.json deleted file mode 100644 index 4ff87eb7b1a2087009cefe0b5216205e7ae5cc6f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/meta-llama/Meta-Llama-3.1-8B/a2e85146ecab24504763.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/5a0d41f5c03fc4c954ba.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/5a0d41f5c03fc4c954ba.json deleted file mode 100644 index e1eaae791ffab0202a4666cc99e7fd9ca4dd2bad..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/5a0d41f5c03fc4c954ba.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/86b177c2cd98bbd85a75.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/86b177c2cd98bbd85a75.json deleted file mode 100644 index 771dd1b5a4e3a9006084e88e684cc1131d11e1eb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/llama/princeton-nlp/Sheared-LLaMA-1.3B/86b177c2cd98bbd85a75.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/1af857548f8340164126.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/1af857548f8340164126.json deleted file mode 100644 index 79d5739687059eeff99b9bc4cc620de79d4c2272..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/1af857548f8340164126.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/293673d033097c5051ea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/293673d033097c5051ea.json deleted file mode 100644 index c868e298bde58e103642685ea4e4439e3aa9ee6c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/Phi-3-mini-4k-instruct/293673d033097c5051ea.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/311a663c7072ce70f546.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/311a663c7072ce70f546.json deleted file mode 100644 index 27504fad1f5d412aacc44e807e5ed064c1449a3e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/311a663c7072ce70f546.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/82f683b838c600de93dd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/82f683b838c600de93dd.json deleted file mode 100644 index 2de490f7026dececafe0769e77d0b8b5ee442d62..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/phi3/microsoft/phi-4/82f683b838c600de93dd.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/8814d0ee3a2434448102.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/8814d0ee3a2434448102.json deleted file mode 100644 index 57f6c48cb111b35cbb5b75e1e61e60621b55ad74..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/8814d0ee3a2434448102.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/cb6a7bce6b0eb3d0b005.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/cb6a7bce6b0eb3d0b005.json deleted file mode 100644 index 9c4db99ca439f00b75b6b98a0ffec73872f5687f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-0.5B/cb6a7bce6b0eb3d0b005.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/905972687882744db14b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/905972687882744db14b.json deleted file mode 100644 index 5cc9d20a4e41379dfb17f6c2b60e232a550960e0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/905972687882744db14b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/c8854adc0e0a91767706.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/c8854adc0e0a91767706.json deleted file mode 100644 index 05baf99f04a059d83d6db1edbf02955ad738c9e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-1.5B/c8854adc0e0a91767706.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/96a9219582e5e8c22c60.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/96a9219582e5e8c22c60.json deleted file mode 100644 index e5ec531b6b26c7a6a4ade7570a23e0adb74a90dc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/96a9219582e5e8c22c60.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/bb422fd0cfa791fb5f84.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/bb422fd0cfa791fb5f84.json deleted file mode 100644 index 51381e94cc095a5ec583824a71b395f349a9ebc1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-14B/bb422fd0cfa791fb5f84.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/b21b27134150841a9638.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/b21b27134150841a9638.json deleted file mode 100644 index 079fe242ce192f0352e8e37767c8a2665dba9f9d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/b21b27134150841a9638.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/cb51a3cd341e88c19dbc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/cb51a3cd341e88c19dbc.json deleted file mode 100644 index e8440d252a6e2ab0468e96ae4cb06242cdf4e71a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-32B-Instruct/cb51a3cd341e88c19dbc.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-72B-Instruct/1282efe89e580fd97709.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-72B-Instruct/1282efe89e580fd97709.json deleted file mode 100644 index a850a19887df35b9a3ad2707905abc4497e1deb9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-72B-Instruct/1282efe89e580fd97709.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/31437b9c7709c391ce36.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/31437b9c7709c391ce36.json deleted file mode 100644 index 324fd21758bb0e2018a98df3b5c9b1f1a829870d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/31437b9c7709c391ce36.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/676cea7335e6d3df6035.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/676cea7335e6d3df6035.json deleted file mode 100644 index fee074f4d01542655b9e578b58d00e292a51ebaf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/676cea7335e6d3df6035.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a09a11cb97a881a4a2dd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a09a11cb97a881a4a2dd.json deleted file mode 100644 index a600483976418f21ae9df8932393b8bcffe6c447..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a09a11cb97a881a4a2dd.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a67d267022368754538b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a67d267022368754538b.json deleted file mode 100644 index f370dffd00da8c0a19787cbbbc2edbab3fab0412..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/a67d267022368754538b.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/c53dc805300cc4859a61.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/c53dc805300cc4859a61.json deleted file mode 100644 index e3102e66fc44ac9895e845774aa05c03cc0e0e3a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/Qwen/Qwen2.5-7B-Instruct/c53dc805300cc4859a61.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/b4eb98b2fa3c50594822.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/b4eb98b2fa3c50594822.json deleted file mode 100644 index d097ccbf1c809f8a765bf60efb63489cf84a5c7a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/b4eb98b2fa3c50594822.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/c51a3fbb1f9d8d0d9abe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/c51a3fbb1f9d8d0d9abe.json deleted file mode 100644 index 2f9758a4bc8e6740ecb196727a2f48424a9567ad..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/c51a3fbb1f9d8d0d9abe.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/73ad00bdd44b37880c1b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/73ad00bdd44b37880c1b.json deleted file mode 100644 index 1e06121ae4a9671bff8de34d4288b2e73f81c6e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/73ad00bdd44b37880c1b.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ac6228ae6951596bb74a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ac6228ae6951596bb74a.json deleted file mode 100644 index 3eeac7ce23d7375d3c6510f79e1b3659a40d012c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ac6228ae6951596bb74a.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/4fb1ccec15a8c5952e74.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/4fb1ccec15a8c5952e74.json deleted file mode 100644 index 18f0a0077d766fb26717e7f97845456bf33d5d4c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/4fb1ccec15a8c5952e74.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/c9a42fa1e5c07781d311.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/c9a42fa1e5c07781d311.json deleted file mode 100644 index 30c0b90db1228a228507b09f65bab94c7f3f9823..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/c9a42fa1e5c07781d311.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/487f25e4f7a9d4fd66e9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/487f25e4f7a9d4fd66e9.json deleted file mode 100644 index 8f6b6c8c9c15cce3a7eac4b94285ab3e03dab91d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/487f25e4f7a9d4fd66e9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/62639bafdeaa2305f69b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/62639bafdeaa2305f69b.json deleted file mode 100644 index dd37e2e2a7b0be8354dd1918064cb84ce87a9119..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/62639bafdeaa2305f69b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/80b943538f10efdcec63.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/80b943538f10efdcec63.json deleted file mode 100644 index a9aeb3f47629777b4efb53ca60abf6460a31a87f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/80b943538f10efdcec63.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5b8abdfd0acabb4c751.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5b8abdfd0acabb4c751.json deleted file mode 100644 index edeb39789dbe488546b6de9e74ffe41b0baf6e21..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a5b8abdfd0acabb4c751.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fb49aa756ab69ca51d74.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fb49aa756ab69ca51d74.json deleted file mode 100644 index 2381f0b6a428340286474069f6b09aaff9a1e1f6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/fb49aa756ab69ca51d74.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json deleted file mode 100644 index 66630fe558766028b5435553aa329a14e7a13241..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json deleted file mode 100644 index 3e167a4933e519119cab99364a00f802616e9ac2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "Jingya/pixart_sigma_pipe_xl_2_512_ms", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": 1, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json deleted file mode 100644 index e7c9160f8f01fd8dae0ecee77a5051710472b9ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": null, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/412f7499962bd8cf2ab0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/412f7499962bd8cf2ab0.json deleted file mode 100644 index 82bd84b269b86651720822747ae65fbaf6be19f8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/412f7499962bd8cf2ab0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5a20be00767587a6c88d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5a20be00767587a6c88d.json deleted file mode 100644 index 9b4bb5a891f9dec53ed02d914c05ef8ca48ec992..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5a20be00767587a6c88d.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5c6f23e5e18865c76460.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5c6f23e5e18865c76460.json deleted file mode 100644 index 8ce8d4bffb6b3571f04868fe47689431daf28dc7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/5c6f23e5e18865c76460.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/72b9e86c6f89db5c69ed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/72b9e86c6f89db5c69ed.json deleted file mode 100644 index 9e7c61f775cd2e4f4b0260bbb4a0e9e81f211dcb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/72b9e86c6f89db5c69ed.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/ad869f4091c60bb870e5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/ad869f4091c60bb870e5.json deleted file mode 100644 index 2c333d175f159befc4a5915de2375ed3d14e58ce..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-2b-instruct/ad869f4091c60bb870e5.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/16ca9236c787cd9420a3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/16ca9236c787cd9420a3.json deleted file mode 100644 index a8ebd108d0cf963605e4978064b3c8ef507189ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/16ca9236c787cd9420a3.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/35e16591a1809c932167.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/35e16591a1809c932167.json deleted file mode 100644 index 2aebdc8d43bfad9c97db9cac16ddddeaed8a3df3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/35e16591a1809c932167.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/b15551586829135adcba.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/b15551586829135adcba.json deleted file mode 100644 index 26ee5c1b2cea35e0b7f331e70561af3508af0970..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/b15551586829135adcba.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/e625bd8bd310d29b9903.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/e625bd8bd310d29b9903.json deleted file mode 100644 index 5b21d4014f5d7553d7e452a50e0d6c323797d849..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/e625bd8bd310d29b9903.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/eff4e27bf399d45984e9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/eff4e27bf399d45984e9.json deleted file mode 100644 index b10935182e346ec02c9c5df795039589267da719..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/granite/ibm-granite/granite-3.1-8b-instruct/eff4e27bf399d45984e9.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3f356404c2fdd24a6092.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3f356404c2fdd24a6092.json deleted file mode 100644 index 292db466007ef020d9f824e0cbef10760015320c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/3f356404c2fdd24a6092.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3fbee63b7426c54a8bef.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3fbee63b7426c54a8bef.json deleted file mode 100644 index b9fd468d00a8602d9dd0a162e7fb597ef041a21f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/3fbee63b7426c54a8bef.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/42f562fe02b392489027.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/42f562fe02b392489027.json deleted file mode 100644 index d4b6cc5cceb14d383c2778fb9e81bf76bf432d47..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/42f562fe02b392489027.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8a6bed981ec9074d2d2f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8a6bed981ec9074d2d2f.json deleted file mode 100644 index b5c5e4d8eddc54e4173ef92ef0c9455a67c98e70..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8a6bed981ec9074d2d2f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cbaad17f9a80269151e5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cbaad17f9a80269151e5.json deleted file mode 100644 index 7e530b7ccc4193ade0e3db04524f18bf7e7b82c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/cbaad17f9a80269151e5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ec05d1a7fe804917e5a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ec05d1a7fe804917e5a9.json deleted file mode 100644 index b5391a38be53346b559fd7600b940e629be162df..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ec05d1a7fe804917e5a9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ef510c39c7d57de00523.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ef510c39c7d57de00523.json deleted file mode 100644 index 66cf65746cddb32eae7cfbb84dcf39630cde1c95..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/ef510c39c7d57de00523.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/fe8a111c02cc04ece279.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/fe8a111c02cc04ece279.json deleted file mode 100644 index f81fb4bbed93d64d8b7bd1df1ef04741ce752813..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/fe8a111c02cc04ece279.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/4224d0f76dcc28cb9a21.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/4224d0f76dcc28cb9a21.json deleted file mode 100644 index ca3d2ceb8087a46846e282b1a5bf8665747a4782..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/4224d0f76dcc28cb9a21.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/a6f373e8aa07caf1ea83.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/a6f373e8aa07caf1ea83.json deleted file mode 100644 index 4140dd338ce08f85714373de75dd7249251d7df7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/lmsys/vicuna-7b-v1.5/a6f373e8aa07caf1ea83.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-13b-hf/b1ca2072f8fefb2e68df.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-13b-hf/b1ca2072f8fefb2e68df.json deleted file mode 100644 index 91fe701100ba8f5b13d7fe8bc16677cd20327d29..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-13b-hf/b1ca2072f8fefb2e68df.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-7b-hf/2e05e5313574955a3f8b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-7b-hf/2e05e5313574955a3f8b.json deleted file mode 100644 index 982f272890832bb6ce43ff6401f36f6681d98f30..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-2-7b-hf/2e05e5313574955a3f8b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/a21fc03d59b41247d5de.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/a21fc03d59b41247d5de.json deleted file mode 100644 index 83ef96c9e9a89fc62da7f48b1bdef94208b50d21..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/a21fc03d59b41247d5de.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/f2300d13c146435d39e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/f2300d13c146435d39e4.json deleted file mode 100644 index 623be86740ae9708d2dae088296838c8df21bc7e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.1-70B-Instruct/f2300d13c146435d39e4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-1B/afd171c0f551c061ae54.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-1B/afd171c0f551c061ae54.json deleted file mode 100644 index 210be67022c3eb64241affa93fa090215089f631..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-1B/afd171c0f551c061ae54.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-3B/53499b4c2d2bffdb6b24.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-3B/53499b4c2d2bffdb6b24.json deleted file mode 100644 index 8307ac0931d3ee2693905ebf8cd5e8b716279f5e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Llama-3.2-3B/53499b4c2d2bffdb6b24.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3-8B/7a1137b177a566a66c22.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3-8B/7a1137b177a566a66c22.json deleted file mode 100644 index b34ad1f47274f6de3911423d19f35cf86c823071..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3-8B/7a1137b177a566a66c22.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/022930d2b5d25659c7ab.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/022930d2b5d25659c7ab.json deleted file mode 100644 index 3dd5c6b3b633780b0bafdf38c78f3298a6f550b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/022930d2b5d25659c7ab.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/26a5531289f68109fb91.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/26a5531289f68109fb91.json deleted file mode 100644 index 3dd420531c72074314283a075ac6258e097d2d36..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/26a5531289f68109fb91.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/41370e3164a37f697e84.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/41370e3164a37f697e84.json deleted file mode 100644 index c0b5267c970ac0a4ad1946e2e6b6844f02a3939c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/41370e3164a37f697e84.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/99f2c09ceee0c8d9fef4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/99f2c09ceee0c8d9fef4.json deleted file mode 100644 index b229c881e8b2ddcf3218951cb48eaf66f64bd14e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/99f2c09ceee0c8d9fef4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/c589190dfb611bf05787.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/c589190dfb611bf05787.json deleted file mode 100644 index 37d24e75e96e43859ee173d860925148e43e89aa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/c589190dfb611bf05787.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/f24ad65ffb5250ba8a40.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/f24ad65ffb5250ba8a40.json deleted file mode 100644 index 3be5e6c929fadd98d920f1cf1b46d12a0e9b913d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/f24ad65ffb5250ba8a40.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/fc3294ba408dd3c18e73.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/fc3294ba408dd3c18e73.json deleted file mode 100644 index fa056f5aa2f32d715d850f7b1a865af2989a71ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/meta-llama/Meta-Llama-3.1-8B/fc3294ba408dd3c18e73.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/8b6580778d10cb2df1cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/8b6580778d10cb2df1cc.json deleted file mode 100644 index 436a2d258cd3a51af64096dafe1f755f30d4931b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/8b6580778d10cb2df1cc.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/bb14e60c6d8860f16909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/bb14e60c6d8860f16909.json deleted file mode 100644 index 574f621264c74c5ba0a450600d17a126b70cd79f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/llama/princeton-nlp/Sheared-LLaMA-1.3B/bb14e60c6d8860f16909.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/4d3e15aa937de5d88ebc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/4d3e15aa937de5d88ebc.json deleted file mode 100644 index e682c007c45bb91d8497aa0b983edb0eee7919b5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/4d3e15aa937de5d88ebc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/e2c643b56d6647497d5d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/e2c643b56d6647497d5d.json deleted file mode 100644 index 4295fcb3f4242d5dcec670739c24243d72f365b5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/Phi-3-mini-4k-instruct/e2c643b56d6647497d5d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/959b1cffd59777b86d7e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/959b1cffd59777b86d7e.json deleted file mode 100644 index e9444c1c743b015e193e5c8834d30d7dfa605408..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/959b1cffd59777b86d7e.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/ce3f43a6fd27f1b9dcd0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/ce3f43a6fd27f1b9dcd0.json deleted file mode 100644 index 3d96c61e0507bac72bb17b8f8a0446a8cc75f0ef..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/phi3/microsoft/phi-4/ce3f43a6fd27f1b9dcd0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/3e0cc260a45d576253bf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/3e0cc260a45d576253bf.json deleted file mode 100644 index 1f902d14a2413f4cafce59aea557904aa9b02393..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/3e0cc260a45d576253bf.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/bf7c82eb3d0222b8d35b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/bf7c82eb3d0222b8d35b.json deleted file mode 100644 index f7c03d16803b503f3cda045d6f803e03ce6a853c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-0.5B/bf7c82eb3d0222b8d35b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/761390518cd81fa7378f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/761390518cd81fa7378f.json deleted file mode 100644 index a21533984756226393651297822bf09afc8af98a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/761390518cd81fa7378f.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/be3cb65bc00a924da4ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/be3cb65bc00a924da4ad.json deleted file mode 100644 index 2560266c5c439f3e295fc5e1d7d0ad8b1e8db822..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-1.5B/be3cb65bc00a924da4ad.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/984622ac4a11aff982ba.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/984622ac4a11aff982ba.json deleted file mode 100644 index 677dbb1101b3bd2ea1b5c6950951df4b9e62d783..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/984622ac4a11aff982ba.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/ecec6ddfa278f11558d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/ecec6ddfa278f11558d3.json deleted file mode 100644 index c9d2dd5e690ca06c2fd664b7352cbc3f5f49cf26..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-14B/ecec6ddfa278f11558d3.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/2db07efca7a1ad75a203.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/2db07efca7a1ad75a203.json deleted file mode 100644 index b10379c918c6bce604caad73e62781e1e11d6d2b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/2db07efca7a1ad75a203.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/d38b8ed1c8092faac004.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/d38b8ed1c8092faac004.json deleted file mode 100644 index a398b099238a93fa207a4aae1ac4552ec255ad9e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-32B-Instruct/d38b8ed1c8092faac004.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-72B-Instruct/4e55c2e2f32c7f6d0b82.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-72B-Instruct/4e55c2e2f32c7f6d0b82.json deleted file mode 100644 index f5faa768da066926d44f0bb11c7513af5de2f324..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-72B-Instruct/4e55c2e2f32c7f6d0b82.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/781539e1c459b6d0ae3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/781539e1c459b6d0ae3c.json deleted file mode 100644 index 6d263c9eef320688111aa01a8f7535f569e41080..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/781539e1c459b6d0ae3c.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ae0c68ebfda01b781ce4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ae0c68ebfda01b781ce4.json deleted file mode 100644 index e61e43571be906b32933cb1c3aace37d7d8f78ce..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ae0c68ebfda01b781ce4.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ba883191f8c4afa0f0a5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ba883191f8c4afa0f0a5.json deleted file mode 100644 index 4cc3aa7f2355c07bae4dbe3d6aa3dddf33d9f467..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/ba883191f8c4afa0f0a5.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/bf401552d498e434ae11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/bf401552d498e434ae11.json deleted file mode 100644 index dc666de2487c9aa81d997b6f23b1b8b4dfae58ed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/Qwen/Qwen2.5-7B-Instruct/bf401552d498e434ae11.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/62b2416e0fcfb1f3ebc9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/62b2416e0fcfb1f3ebc9.json deleted file mode 100644 index 11c463d350649634d04519bc21fc70468f421276..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/62b2416e0fcfb1f3ebc9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/932aca62c94604ad1740.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/932aca62c94604ad1740.json deleted file mode 100644 index 31526f962e66a8032d9d12f6d4688a91d1f41cbe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/932aca62c94604ad1740.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/17d025d979a35586bf35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/17d025d979a35586bf35.json deleted file mode 100644 index 9064df4c68faada9bb0c4a2736bc817ecc38a569..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/17d025d979a35586bf35.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7a6869bae52eff242462.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7a6869bae52eff242462.json deleted file mode 100644 index 2176dcb95507ac9c2bc94e7dc6846cd8dcaf1e14..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7a6869bae52eff242462.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/48a209d7f8c620e494a3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/48a209d7f8c620e494a3.json deleted file mode 100644 index a3debe2727d2697aed3f46069a9c45c6b6172a09..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/48a209d7f8c620e494a3.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a286bb83ff5f2b07eae6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a286bb83ff5f2b07eae6.json deleted file mode 100644 index fcdf317d4f5f126110f2944bd8e90c0cb09d6874..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a286bb83ff5f2b07eae6.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/34ba142418c834ff40d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/34ba142418c834ff40d3.json deleted file mode 100644 index 1667680ebc6edeb5facd3e95cc5aef8e88262b04..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/34ba142418c834ff40d3.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/68d34087564abd26644c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/68d34087564abd26644c.json deleted file mode 100644 index f5103857409006b97d7d51ed84222505c7aa452b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/68d34087564abd26644c.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6fca926d14a95dffea2f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6fca926d14a95dffea2f.json deleted file mode 100644 index 6dce7e3ada029639cde6005c3ab3105b1da73bb1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6fca926d14a95dffea2f.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9e337d5506206920413c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9e337d5506206920413c.json deleted file mode 100644 index def65722ac2598891d08b9261ce3314aeb2e42bb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9e337d5506206920413c.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/eba1df548effd8e75d99.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/eba1df548effd8e75d99.json deleted file mode 100644 index 95a1b1d9accadd71b82685d15d616496013fa493..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/eba1df548effd8e75d99.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.2.2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json deleted file mode 100644 index 66630fe558766028b5435553aa329a14e7a13241..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/222b5da19ecb8337f174.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/222b5da19ecb8337f174.json deleted file mode 100644 index f8eda8eb738dd9cb15ea43b643b0c02cf77a500a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/222b5da19ecb8337f174.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/7a8e6beda4aa4e7660fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/7a8e6beda4aa4e7660fe.json deleted file mode 100644 index 69b6b300178d43e91a9387273204fdf5e27b1d27..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/7a8e6beda4aa4e7660fe.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/9f1af819f866dee08080.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/9f1af819f866dee08080.json deleted file mode 100644 index d5b4812a8b5ce8fce708eb0d6540cb5bb197de1a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/9f1af819f866dee08080.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/e8f136a47c64f17bb7d7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/e8f136a47c64f17bb7d7.json deleted file mode 100644 index 0285c7582ef5af6e3df62fa86d813744de84a048..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/e8f136a47c64f17bb7d7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/f1a3aa0dd3c3485f2447.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/f1a3aa0dd3c3485f2447.json deleted file mode 100644 index 9676866f3d3514a94173ce8544c887dd446ebbc8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-2b-instruct/f1a3aa0dd3c3485f2447.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/22068443ee1190645ff9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/22068443ee1190645ff9.json deleted file mode 100644 index 384f7aa233d7f4691ba2ea3589001a9d0f016214..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/22068443ee1190645ff9.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/320433f9e92526a66bf9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/320433f9e92526a66bf9.json deleted file mode 100644 index d56267279fe867ff32217b7df5d35de4b225960b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/320433f9e92526a66bf9.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/6d040bc84538e4915028.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/6d040bc84538e4915028.json deleted file mode 100644 index 219e0b2b88da36b8a292bc74b1841e8af5a8976e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/6d040bc84538e4915028.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/c6cb4832defee7e6be1a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/c6cb4832defee7e6be1a.json deleted file mode 100644 index fe2e7c9241bf3073d7ebe2ca532a1a160fa8e963..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/c6cb4832defee7e6be1a.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/e4d37513db345dfb1528.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/e4d37513db345dfb1528.json deleted file mode 100644 index 7217bc9f14424fa19110c204f3d6978806ce40ff..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/granite/ibm-granite/granite-3.1-8b-instruct/e4d37513db345dfb1528.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8871846e0e6c075245ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8871846e0e6c075245ad.json deleted file mode 100644 index a70ccff25f090b900cd626252b58842aa2bb66aa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8871846e0e6c075245ad.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json deleted file mode 100644 index 9234fbb600cccb587825d0939b6d5a760ac1d55b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json deleted file mode 100644 index ff463501d621a2a15cc53d413da3339948ad2cb5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json deleted file mode 100644 index db42b2fc64190bce1980a9e983072d5706e97ebf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/017745b45cc644bbe8ce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/017745b45cc644bbe8ce.json deleted file mode 100644 index 7e62a4f287df6b9ed54ce87f9b47938ccf5d1e01..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/017745b45cc644bbe8ce.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8099d8ca34c5fccac2d1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8099d8ca34c5fccac2d1.json deleted file mode 100644 index 7b2feb6007eec1e50a961b23bb10d44693c49169..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8099d8ca34c5fccac2d1.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a3268a6ca462d219eed1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a3268a6ca462d219eed1.json deleted file mode 100644 index a86787f99a4f69f4d6cf979db48049aeef17f299..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/a3268a6ca462d219eed1.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3b2fd06c99d80a52ba5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3b2fd06c99d80a52ba5.json deleted file mode 100644 index 0a7ebdf32520fb38c288cd63cdfd74c9e3e47ffc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3b2fd06c99d80a52ba5.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/dfe93977621c29e6ca83.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/dfe93977621c29e6ca83.json deleted file mode 100644 index 9a0c7328dfc53b4c748e39e9f1b76137e56a31ce..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/dfe93977621c29e6ca83.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f4c21971051a2f5c5a6c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f4c21971051a2f5c5a6c.json deleted file mode 100644 index e14d52321dbbcd5ffdf63eaa8dee0d30dada7397..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/f4c21971051a2f5c5a6c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json deleted file mode 100644 index a7fa50b82233e87631a088d5b6f03c9a71b2de79..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json deleted file mode 100644 index f0adc5f89462bf5c9456edee56c130191923341b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json deleted file mode 100644 index b7311de7b540e660fea05525e4757263a50ff52e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 131072, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json deleted file mode 100644 index ca6186b6a919a7ea252eb7cd87bea0a46bfcae2b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/adaf8d0f80a96f2eb69f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/adaf8d0f80a96f2eb69f.json deleted file mode 100644 index caaa548736d0fbfb310dd9614122b574868a66f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/adaf8d0f80a96f2eb69f.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/bd3cbc0d22fb321f3359.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/bd3cbc0d22fb321f3359.json deleted file mode 100644 index ff1293156efccfddfdfcdcac6d2a53498c747269..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/lmsys/vicuna-7b-v1.5/bd3cbc0d22fb321f3359.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-13b-hf/a8a504470d07b7910c4a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-13b-hf/a8a504470d07b7910c4a.json deleted file mode 100644 index cc07d1d7fbb6375c2ae038adc6b3790d428ab2e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-13b-hf/a8a504470d07b7910c4a.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-7b-hf/6e5be9b1e54031995cce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-7b-hf/6e5be9b1e54031995cce.json deleted file mode 100644 index 75367a5543e3d6c2df6a8e9473f47a823dc53732..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-2-7b-hf/6e5be9b1e54031995cce.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/5751810239b24179a7ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/5751810239b24179a7ff.json deleted file mode 100644 index 9c3cb325ecff02e1c7efd583399118ed13250745..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/5751810239b24179a7ff.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/fd331a0b1e076dc3557f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/fd331a0b1e076dc3557f.json deleted file mode 100644 index dc978a735787c44ce24a4adf3af1b090fad34133..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.1-70B-Instruct/fd331a0b1e076dc3557f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-1B/be98b52355eeef605845.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-1B/be98b52355eeef605845.json deleted file mode 100644 index 1960711799a4d9759ecd23cc501f7ec524213b83..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-1B/be98b52355eeef605845.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-3B/e390a040c043e800b639.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-3B/e390a040c043e800b639.json deleted file mode 100644 index da6fd3a6010a421dc5c5335c4c74fdd04a468b51..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Llama-3.2-3B/e390a040c043e800b639.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3-8B/f573843869aa037c3fed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3-8B/f573843869aa037c3fed.json deleted file mode 100644 index 4a3550f2119a1d0988c9c6aa8e0c0170530648a7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3-8B/f573843869aa037c3fed.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1af11de09f41361090d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1af11de09f41361090d2.json deleted file mode 100644 index b45d7f7f5bcb73b79bd82bd943c769e26e94a33e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1af11de09f41361090d2.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1efefb0ce1047253e234.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1efefb0ce1047253e234.json deleted file mode 100644 index dc2d8a7de8115892031b7845b0e13867690d68e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/1efefb0ce1047253e234.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/3214baa099c127fb974d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/3214baa099c127fb974d.json deleted file mode 100644 index d7c54412138645837959441b57e8a945506afd38..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/3214baa099c127fb974d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/356e8242db0af0e907e1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/356e8242db0af0e907e1.json deleted file mode 100644 index 6441aa3ab223a8758f3fcb2cd7a057922e65bd91..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/356e8242db0af0e907e1.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/56374f055fe625562d0d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/56374f055fe625562d0d.json deleted file mode 100644 index e18fe39d22546179b29fa21283a6da5c5defb03c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/56374f055fe625562d0d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/d5f45e0b756280d0276d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/d5f45e0b756280d0276d.json deleted file mode 100644 index 2ee82a005fa04f6b348c93865da57c3a91da996e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/meta-llama/Meta-Llama-3.1-8B/d5f45e0b756280d0276d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/33e5d4a040757289c0f7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/33e5d4a040757289c0f7.json deleted file mode 100644 index 125cc1151840c036516c138c4249833a9f226aab..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/33e5d4a040757289c0f7.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/4cafdc39efdc669697af.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/4cafdc39efdc669697af.json deleted file mode 100644 index 290f224dbe6ecd82997095abbebc7e35f0c938c6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/princeton-nlp/Sheared-LLaMA-1.3B/4cafdc39efdc669697af.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json deleted file mode 100644 index 484805f4bbf64db4174cacc4639d57e37d049973..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json deleted file mode 100644 index 0707f3d835835f2157afca9793bb1bda2850245b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json deleted file mode 100644 index df5c4f98b4c59492cb0de4cc0acd62a98b36d93a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json deleted file mode 100644 index 212f30de874a8e3d982aaf5937a2bf3def86571f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json deleted file mode 100644 index 1a2ebfc997e5a074741c744fc7bb41a8f9ed4e9d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/88311eacf5aba325fe03.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/88311eacf5aba325fe03.json deleted file mode 100644 index dea04cf839a1b1bfe64d9ee95980845260f2c3e2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/88311eacf5aba325fe03.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/b61951e49a326bc63739.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/b61951e49a326bc63739.json deleted file mode 100644 index 6396a56c80e45e7650a346ec6ba3bf1e535918d2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/b61951e49a326bc63739.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/0c43e36fe933264282d7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/0c43e36fe933264282d7.json deleted file mode 100644 index ad73ddd78ba58a8dbd03149d50041af820cd685e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/0c43e36fe933264282d7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/d2597a27777c6b10fec6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/d2597a27777c6b10fec6.json deleted file mode 100644 index 46e3942ccf6a0af6ab686692fcebffa7298f026d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/phi-4/d2597a27777c6b10fec6.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 10 - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/1f573f4893707e20baea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/1f573f4893707e20baea.json deleted file mode 100644 index 1cac115418323069d2144eea527e7d80bbf69bcc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/1f573f4893707e20baea.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json deleted file mode 100644 index 161b42eb8d7dd180a9a5b1b0f87113d16f248e1d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json deleted file mode 100644 index 870334d473a137bb05259700d813a3bad057bff8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json deleted file mode 100644 index f6c1fb3591e46d22ddb7c7accd892732b6e1672c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/581654da72b47b93070d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/581654da72b47b93070d.json deleted file mode 100644 index 972159c013ed8cc2d41b1e12437358344c0c09ea..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/581654da72b47b93070d.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json deleted file mode 100644 index 779562dc5dfaf0f681f3809d0d5334314268cc0d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json deleted file mode 100644 index ce0d7b464ab78207a29bbe17527312cba0325d43..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fuse_qkv": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/a022a3e592eea3ca3b5d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/a022a3e592eea3ca3b5d.json deleted file mode 100644 index 6d7eb0853b75c095a81dc7fb58d38969ac8a98b1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/a022a3e592eea3ca3b5d.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/d2572e7c03f09047aa6c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/d2572e7c03f09047aa6c.json deleted file mode 100644 index 7f8e4466b264cea711a2ac1fe81dd908177e1f2c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/d2572e7c03f09047aa6c.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/e1226cd19c109531ddcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/e1226cd19c109531ddcf.json deleted file mode 100644 index 9b725bc1356e52a6c666d7ca5cfc122ae49e3214..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/e1226cd19c109531ddcf.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/3cf132ccf09f0a6181a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/3cf132ccf09f0a6181a9.json deleted file mode 100644 index 19627d66b1801689554fb606b8e9719a025b6e81..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/3cf132ccf09f0a6181a9.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/4b77b7654e69161fb5a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/4b77b7654e69161fb5a4.json deleted file mode 100644 index d4541cafab63ae41b93a06c6515fd1fa73d96227..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/4b77b7654e69161fb5a4.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/505c7bbc2dcf05e4b29a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/505c7bbc2dcf05e4b29a.json deleted file mode 100644 index fdc067181a16b8efdbdde8e27dd471b3e5612880..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/505c7bbc2dcf05e4b29a.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/7039413699371e8f2bae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/7039413699371e8f2bae.json deleted file mode 100644 index 933071e04a2958635a912060e9e2a5c50df05c8d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-1.5B/7039413699371e8f2bae.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/189e1f24b1ce805f81a1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/189e1f24b1ce805f81a1.json deleted file mode 100644 index 9f3c74a794db7dd835aa6c1ef5cf7b7354983aed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/189e1f24b1ce805f81a1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/42e320cd125e04b0ce8f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/42e320cd125e04b0ce8f.json deleted file mode 100644 index 211ee00d9050c12c0d3044096b460e3e3aa68714..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-14B/42e320cd125e04b0ce8f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/5b36aff0fa38454aed91.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/5b36aff0fa38454aed91.json deleted file mode 100644 index 18b4e80842fa9573c3e9b362562efd41446472f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/5b36aff0fa38454aed91.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/e8a7d2d9a75703d3e36f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/e8a7d2d9a75703d3e36f.json deleted file mode 100644 index ce097ab4a1cd0a215c93953a8a7a4129c4356996..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-32B-Instruct/e8a7d2d9a75703d3e36f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/361d7ffc47ace74811f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/361d7ffc47ace74811f1.json deleted file mode 100644 index 2c60acbb4fa9d781c4478e84aa3304c402a97834..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/361d7ffc47ace74811f1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 24 - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/8cd3f0c94b036fce05ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/8cd3f0c94b036fce05ff.json deleted file mode 100644 index df3cd76cabf0f68c722a69eaa51c1f2fe75a7dc4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-72B-Instruct/8cd3f0c94b036fce05ff.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/5fd978eb22527bb79bb2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/5fd978eb22527bb79bb2.json deleted file mode 100644 index 4b7d5daefc060d71542164de9b28075e44c59d5e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/5fd978eb22527bb79bb2.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/7691902d93219a80aebb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/7691902d93219a80aebb.json deleted file mode 100644 index 90bd5b7403270e06da568c23920bfc4a53b30fb0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/7691902d93219a80aebb.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/93ac23ee9e447c6d292d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/93ac23ee9e447c6d292d.json deleted file mode 100644 index 96894751c23cc30d47d56e37fe567de37758c2cd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/93ac23ee9e447c6d292d.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/9dd2921527c5091a4f13.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/9dd2921527c5091a4f13.json deleted file mode 100644 index 9c1d406a093e2fa307e35a9a242fa9ad7efe61bf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/9dd2921527c5091a4f13.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/a2559942b3ead6a9df97.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/a2559942b3ead6a9df97.json deleted file mode 100644 index ac6b1628b7c4f2f92e059c8c88eb60191df0e3cd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/a2559942b3ead6a9df97.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/c654f466f5b284905f41.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/c654f466f5b284905f41.json deleted file mode 100644 index dc15f7c7d74139a93f4a5d45813da9bc83429c1b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/c654f466f5b284905f41.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/fb4aca23519c98ca4a4f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/fb4aca23519c98ca4a4f.json deleted file mode 100644 index 1552b6152b3e6c279e05c7057a9b799fa908a6ed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-7B-Instruct/fb4aca23519c98ca4a4f.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/37df254bafb83b1acec9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/37df254bafb83b1acec9.json deleted file mode 100644 index 2a3f8a4ebd8331ddd7bde1a6743e39e4e0690dde..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/37df254bafb83b1acec9.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/3cb84f1a731d16a91b6a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/3cb84f1a731d16a91b6a.json deleted file mode 100644 index 407c0aa2c9e9aef1b088ae494988ee4d8186fda2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/3cb84f1a731d16a91b6a.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/7c1b7f662246663151c6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/7c1b7f662246663151c6.json deleted file mode 100644 index f1a8ea26e0a0994f0a040e3d9bb9c072368c6a31..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/7c1b7f662246663151c6.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/840298e38faaea1b2c16.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/840298e38faaea1b2c16.json deleted file mode 100644 index 689e1219d7c4e5651d5a678dd68e8857c97fe164..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/840298e38faaea1b2c16.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ddeddea0da60491bf0fc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ddeddea0da60491bf0fc.json deleted file mode 100644 index 24ddbde6f8fbeb14f711f028f6291e244f6e9783..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/ddeddea0da60491bf0fc.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/df0c62b2f501fe668f1d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/df0c62b2f501fe668f1d.json deleted file mode 100644 index 36562d583cfa0417a51ff0594dbaae87e01199b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/df0c62b2f501fe668f1d.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 16, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8e008b683f0c311e4955.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8e008b683f0c311e4955.json deleted file mode 100644 index 9d8c94a0c98406985d2bd939ff9565a491bd8945..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/8e008b683f0c311e4955.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/d4baccc1db1afe0dfbb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/d4baccc1db1afe0dfbb1.json deleted file mode 100644 index 3c651bb70bbb4e89e59ee66baa8db5d28a002416..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/d4baccc1db1afe0dfbb1.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/02569cd5bd1e32216e8e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/02569cd5bd1e32216e8e.json deleted file mode 100644 index 5afcb4e3bb799fc3a77189ae5dd439db4f1ffb9c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/02569cd5bd1e32216e8e.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/343538e8399db50d5684.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/343538e8399db50d5684.json deleted file mode 100644 index 1122b096b8116cdfe712b5230874cf36a70065b3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/343538e8399db50d5684.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9627eeb462195d3c4fc0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9627eeb462195d3c4fc0.json deleted file mode 100644 index 6adcfb38869053742087cedc18426ff1417ce64a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9627eeb462195d3c4fc0.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9c1031b77d42ae5abd50.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9c1031b77d42ae5abd50.json deleted file mode 100644 index 9469aa765e0d09e527265e38add22606721a5d44..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/9c1031b77d42ae5abd50.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/be9ce236e618e23d37e3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/be9ce236e618e23d37e3.json deleted file mode 100644 index d47017a9fa74f2ee890f31d4dd5cae0a43872a7c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/be9ce236e618e23d37e3.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c20524e61f78915f94a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c20524e61f78915f94a6.json deleted file mode 100644 index 03d4d71fde2fafa1d68e6f1d991dedd581c4a602..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c20524e61f78915f94a6.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c4ba46257a61e559d2e6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c4ba46257a61e559d2e6.json deleted file mode 100644 index 1846edc96cd2424a4a26b980c542d85cde7ee72f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c4ba46257a61e559d2e6.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": false, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev0", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json deleted file mode 100644 index 06847ba9d7b294919def1dea05faa6f5887c32c9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen3-1.7B", - "_task": "text-generation", - "architectures": [ - "Qwen3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 6144, - "max_position_embeddings": 40960, - "max_window_layers": 28, - "model_type": "qwen3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen3-1.7B", - "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000, - "sliding_window": null, - "tie_word_embeddings": true, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json deleted file mode 100644 index c03826e53ce5ed12960558376f7a60bb1481aff6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen3-1.7B", - "_task": "text-generation", - "architectures": [ - "Qwen3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 6144, - "max_position_embeddings": 40960, - "max_window_layers": 28, - "model_type": "qwen3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen3-1.7B", - "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev0", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000, - "sliding_window": null, - "tie_word_embeddings": true, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json deleted file mode 100644 index 3e167a4933e519119cab99364a00f802616e9ac2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "Jingya/pixart_sigma_pipe_xl_2_512_ms", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": 1, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json deleted file mode 100644 index e7c9160f8f01fd8dae0ecee77a5051710472b9ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": null, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json deleted file mode 100644 index d275807cdbfb870a0be53266909b4296b27a64a9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json deleted file mode 100644 index 00eb3567442b2cfd634e41a17799aaecf7ea764c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json deleted file mode 100644 index e570ab13e6bb7e702a8883bdb57fbd286231eeb9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json deleted file mode 100644 index 85952b928cf206bb293a37c628cb5d451cd451f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/28dcbb2dce9b3a1604de.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/28dcbb2dce9b3a1604de.json deleted file mode 100644 index bb7d20a3749172630d6fdc7e539c635a9c20cf5b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/28dcbb2dce9b3a1604de.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/65542d3c940a7ea06629.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/65542d3c940a7ea06629.json deleted file mode 100644 index b3de687452c8ce8a63b50fda5b2c39257390d6e0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/65542d3c940a7ea06629.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json deleted file mode 100644 index f9db0fe2b47988867e19cfa9969e63566683b31c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json deleted file mode 100644 index fb6b0f8cb492cd8e7993b54ac0190e949c9c53b1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/b987e4e7f5a480cc63a7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/b987e4e7f5a480cc63a7.json deleted file mode 100644 index 1cf30ea260a6530d46d1174096b28feca69f39e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/b987e4e7f5a480cc63a7.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json deleted file mode 100644 index e3eb8c6309a6a35242846f53b978e9fd8fd5244a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json deleted file mode 100644 index ed3b38c98d9e1b387b66962b48b9690a173ee94e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e293aff80a9c1c698ab5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e293aff80a9c1c698ab5.json deleted file mode 100644 index ff90a6f491a3b62bedbc8ab576104006aa4974e6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e293aff80a9c1c698ab5.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/f1c5b86d6a1a8829995e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/f1c5b86d6a1a8829995e.json deleted file mode 100644 index 6fd9b3f7ce986c7fcd8d0959056572ae0989c7fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/f1c5b86d6a1a8829995e.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/0940dfe853c062a30f58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/0940dfe853c062a30f58.json deleted file mode 100644 index 63b890d95330ec714e05b4263a15b544615a5281..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/0940dfe853c062a30f58.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 8, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/13980a64437f6cc8ce28.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/13980a64437f6cc8ce28.json deleted file mode 100644 index 10303f2b6d7d0e99f42027f26c4514235b388169..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/13980a64437f6cc8ce28.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/868bc6c1a102ae3a578d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/868bc6c1a102ae3a578d.json deleted file mode 100644 index 5ad8381b56e8f0a8c69a974a8623367dc4b03164..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/868bc6c1a102ae3a578d.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/b06f2835447ce22c3cf0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/b06f2835447ce22c3cf0.json deleted file mode 100644 index 4c35bd9c201cd79dadab04649dfcc09a494bfe22..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/b06f2835447ce22c3cf0.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 32, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/f07419a28c681a9c503d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/f07419a28c681a9c503d.json deleted file mode 100644 index 98e2f217343ddf3756b211c03bedfcc1583143c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-8b-instruct/f07419a28c681a9c503d.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 8 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json deleted file mode 100644 index 41994e12ef5d0f71f9b7a3da3a0390d1cb5c6606..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.3-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", - "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49159 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/09bd8d258e6ce71f1ec3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/09bd8d258e6ce71f1ec3.json deleted file mode 100644 index f56a3297767d69f19026f6411a7a5879afe931f2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/09bd8d258e6ce71f1ec3.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/4f36e58d85b03f53b3cb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/4f36e58d85b03f53b3cb.json deleted file mode 100644 index 66ea02772d8cdbec88ab5aa36954484c12dc27a7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/4f36e58d85b03f53b3cb.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/815512fea79cd4a5ce29.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/815512fea79cd4a5ce29.json deleted file mode 100644 index 8efd82fe3a544c7312855918a2eab8dcb1e0e1df..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/815512fea79cd4a5ce29.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/90c6215b953d153d6686.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/90c6215b953d153d6686.json deleted file mode 100644 index c58431c05ae26d728927936184ea8708026ad260..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/90c6215b953d153d6686.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bdc0fcee762a7e036e7f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bdc0fcee762a7e036e7f.json deleted file mode 100644 index 1830d908a54623895998bc0f08fe79b8d67298fb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bdc0fcee762a7e036e7f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c4b2de37297bdcd45380.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c4b2de37297bdcd45380.json deleted file mode 100644 index 5aea9d99db268bcabf471390a56ffd61c9024570..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c4b2de37297bdcd45380.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e4432e0f1cacde946a3f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e4432e0f1cacde946a3f.json deleted file mode 100644 index 9fb1bf1b7b74f2589d7c2e26176fe66a914dc1ed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e4432e0f1cacde946a3f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json deleted file mode 100644 index e2199f4bcd5afceef8b436b0604458d7ffa940d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json deleted file mode 100644 index d06d7489fd2f02e68ccc3db3bc42dc9aab412dab..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json deleted file mode 100644 index 01bdfd8810bccfc48298fe2d8748a300be7808b5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json deleted file mode 100644 index a264242c960e5343432f086a620fa49f1847e6ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json deleted file mode 100644 index 1a16723daefaa5f854b0ffaf693f91336bf4590e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "BSH", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json deleted file mode 100644 index 985951ec31d99688d6a9f2f3088913292858a408..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/1ed85ee21363b6720e7b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/1ed85ee21363b6720e7b.json deleted file mode 100644 index 31ee45dbdd0438653e0413957921c94e633af798..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/1ed85ee21363b6720e7b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/241c46cfeadbeb850f04.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/241c46cfeadbeb850f04.json deleted file mode 100644 index 4253c11c006cb19668c511593275390ac4745206..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/lmsys/vicuna-7b-v1.5/241c46cfeadbeb850f04.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-13b-hf/08b3dafc6ceda589bb47.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-13b-hf/08b3dafc6ceda589bb47.json deleted file mode 100644 index 4bbbacad8ee3b36a3289ab82c09f9cbf130e68bf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-13b-hf/08b3dafc6ceda589bb47.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-7b-hf/6bd20f519cb51b45f5ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-7b-hf/6bd20f519cb51b45f5ca.json deleted file mode 100644 index 17b7547ccd7c2f511994764e5396ce1b40991c71..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-2-7b-hf/6bd20f519cb51b45f5ca.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/0940e6eea6c48108cd7d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/0940e6eea6c48108cd7d.json deleted file mode 100644 index 3cd8c9b88c106a5c75ac4f7ef51adf65d5d9c4cb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/0940e6eea6c48108cd7d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/b3acc5bba9327f8409a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/b3acc5bba9327f8409a0.json deleted file mode 100644 index a0f54f1d421bc63eafd12565399b683bc74b7808..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.1-70B-Instruct/b3acc5bba9327f8409a0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-1B/7fe2ff60952e128ea292.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-1B/7fe2ff60952e128ea292.json deleted file mode 100644 index e618ddb9319b884383c84bc29d9047eb13a0c8b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-1B/7fe2ff60952e128ea292.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-3B/c02e5956f5c60addcd7a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-3B/c02e5956f5c60addcd7a.json deleted file mode 100644 index fc78440ff88f2a83231eb24fee8429203aeff6c2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Llama-3.2-3B/c02e5956f5c60addcd7a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3-8B/3bb34b46c5b5fab24ccd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3-8B/3bb34b46c5b5fab24ccd.json deleted file mode 100644 index b7882f19c7fd98641593964f9c3d663f8dcb3939..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3-8B/3bb34b46c5b5fab24ccd.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/3b49f465b4190f23c24d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/3b49f465b4190f23c24d.json deleted file mode 100644 index 5974bdab3e5155d7a73db5fb8e22e783fac22f53..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/3b49f465b4190f23c24d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/9d8ade0dd59a67b84ee0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/9d8ade0dd59a67b84ee0.json deleted file mode 100644 index ea8543e17a11e09e9d5f6ac1f36707a36c75a3b3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/9d8ade0dd59a67b84ee0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a1e9306917d592008a0a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a1e9306917d592008a0a.json deleted file mode 100644 index ced0f45d83c59841776f76d14aed8411e5dd979c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a1e9306917d592008a0a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a3e4a0269f1e5e67c6b9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a3e4a0269f1e5e67c6b9.json deleted file mode 100644 index 5f7ba6c897dfa4676e9782d1ec0c700303a31743..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/a3e4a0269f1e5e67c6b9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/c9cceba001530c5a3a11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/c9cceba001530c5a3a11.json deleted file mode 100644 index 312970ca0e9b0fa9b127d7a2265ef477168616ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/c9cceba001530c5a3a11.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/d274ba1c99708335db55.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/d274ba1c99708335db55.json deleted file mode 100644 index 915b05f8991026e54198aeefb877834a17251198..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/meta-llama/Meta-Llama-3.1-8B/d274ba1c99708335db55.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/f3a5bbbf7c2fcb85fb02.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/f3a5bbbf7c2fcb85fb02.json deleted file mode 100644 index f7b153f46245cfe6dc344c3a92c392cda39b112d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/f3a5bbbf7c2fcb85fb02.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/fecfe3833c16d6aaa3d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/fecfe3833c16d6aaa3d3.json deleted file mode 100644 index 04fe7b0b7fa2895e8798034ebe3f268fb87f24aa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/princeton-nlp/Sheared-LLaMA-1.3B/fecfe3833c16d6aaa3d3.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json deleted file mode 100644 index 6c4d11132e576f710384e40866d2306385507476..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json deleted file mode 100644 index b37c012b8de4311564d312e0db455b81aa1ce41c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json deleted file mode 100644 index 4cef32eafadc72598528fee29ffc8bd1d3ed53e4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json deleted file mode 100644 index bc22200f7902327a730a4cdba88cd1463733255a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json deleted file mode 100644 index 5d8ded2e0cadb34394415461281d23f65bad2f91..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/5b8ae963e4a07386d1f3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/5b8ae963e4a07386d1f3.json deleted file mode 100644 index a4ba29aae0b83c8e56ad933be2e7f571a8401f72..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/5b8ae963e4a07386d1f3.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json deleted file mode 100644 index 74db402beea94e500cb3f4a8f77b9660e0165818..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": null, - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6f752fa89bf8a359a0a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6f752fa89bf8a359a0a6.json deleted file mode 100644 index a332177b25387e4493b11f86fd597b9afe84482e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6f752fa89bf8a359a0a6.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/d01669d81ec0ec7f0a7f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/d01669d81ec0ec7f0a7f.json deleted file mode 100644 index cda7770c7cd799b2c671d79a9c96bd38a8588d2c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/d01669d81ec0ec7f0a7f.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/ec602483becaca417689.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/ec602483becaca417689.json deleted file mode 100644 index bba28dc4f3528b78bf6833e41b2ea8d7eb46fb1d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/phi-4/ec602483becaca417689.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json deleted file mode 100644 index 51ecee0d74bf7f4205108309ea56c4e5a4ac9d3d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json deleted file mode 100644 index 0b39e09fdd981bd22b3a2220391784a1cc253c19..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json deleted file mode 100644 index 67faa73f3256763a9e6d2147fd2c2b98966f7c42..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": false, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "fp16", - "batch_size": 1, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "collectives_layout": "HSB", - "continuous_batching": false, - "fuse_qkv": true, - "group_query_attention": "replicated-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev1", - "output_all_logits": false, - "sequence_length": 100, - "tp_degree": 2 - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/00660417f67f0c05b792.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/00660417f67f0c05b792.json deleted file mode 100644 index e6cf29bcf89f034d1272b9e8ef6ad0484906d52b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/00660417f67f0c05b792.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/42260dd4669313654033.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/42260dd4669313654033.json deleted file mode 100644 index 5ade3e11dc7a79f238a8f30cae33d74e5dfed879..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/42260dd4669313654033.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json deleted file mode 100644 index c556c037122dfbad48571b48299a6826f1b2fabf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json deleted file mode 100644 index 860b69d18a402bcfa80933283e8fc5a4a66395d0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json deleted file mode 100644 index 93be311cacd8ddda195f8cec66372b7830815ef7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json deleted file mode 100644 index aace1a40cdc895d5610a7fa6a9c9f41528481741..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 1, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 1, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/46a8883ba1546c233324.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/46a8883ba1546c233324.json deleted file mode 100644 index 9f74037ccde9f933f2dd18d1640e43c784f0b4c3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/46a8883ba1546c233324.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/d1fe5db842c1dfc03018.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/d1fe5db842c1dfc03018.json deleted file mode 100644 index dc5b7792eef4d6167bd15acf62707cb43a0564e8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-1.5B/d1fe5db842c1dfc03018.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/382fcb01d588f88771cd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/382fcb01d588f88771cd.json deleted file mode 100644 index 4faaac310f021d39e9d443cddc51f7f936ab3bda..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/382fcb01d588f88771cd.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/be5eb722c9eece640652.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/be5eb722c9eece640652.json deleted file mode 100644 index 12f47b78d86d9ac75f990d65c417fc497eb833d6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-14B/be5eb722c9eece640652.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/6132504bc792f154c158.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/6132504bc792f154c158.json deleted file mode 100644 index c6a1a41537b42b625f19655b4a42c36f2b86fd8d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/6132504bc792f154c158.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/fe3e258ef96be7ce5e90.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/fe3e258ef96be7ce5e90.json deleted file mode 100644 index b3c651f7189bf38c77807851e345eff3f82887d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-32B-Instruct/fe3e258ef96be7ce5e90.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-72B-Instruct/b2a8c0e4fbc830460cd8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-72B-Instruct/b2a8c0e4fbc830460cd8.json deleted file mode 100644 index 0ee1640f82ebfce8123712368a97fe11a308a8e2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-72B-Instruct/b2a8c0e4fbc830460cd8.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/1ade96ad819a7bad64eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/1ade96ad819a7bad64eb.json deleted file mode 100644 index cf4ff7d2d95b49065b76f831748248bddce2d34b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/1ade96ad819a7bad64eb.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/4b31d55cb498352e888f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/4b31d55cb498352e888f.json deleted file mode 100644 index 385a31ff665da69aba61dda4b664b653a35ec482..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/4b31d55cb498352e888f.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/5c6df385dab7a5f7ba44.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/5c6df385dab7a5f7ba44.json deleted file mode 100644 index 911f63142aefe02124770797b4728d493ad00dc5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/5c6df385dab7a5f7ba44.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/e2149dff42085cf607e1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/e2149dff42085cf607e1.json deleted file mode 100644 index 79eb6d5f3923e9cd353df12a7fd7da2d0ccd98ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-7B-Instruct/e2149dff42085cf607e1.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/24d832cfe8218e622328.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/24d832cfe8218e622328.json deleted file mode 100644 index 096137f2294bbf8554ef23698696b8a86faaf814..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/24d832cfe8218e622328.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6198316ef213f0114312.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6198316ef213f0114312.json deleted file mode 100644 index 5633f48be32cafb5b6d232b3b769e5f736ab85df..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6198316ef213f0114312.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0800237a1ef47a8ffd25.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0800237a1ef47a8ffd25.json deleted file mode 100644 index 3aab033b56f251bebef90d281b8b09c704a0e460..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0800237a1ef47a8ffd25.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7dcca96e08db45b0ce15.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7dcca96e08db45b0ce15.json deleted file mode 100644 index 88e50823d71826488fa9517ca58973b5f5e8468a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7dcca96e08db45b0ce15.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/19800d32466a804031cb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/19800d32466a804031cb.json deleted file mode 100644 index 1e5ef97f6e52ebdbb6808422955a314eac60d64c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/19800d32466a804031cb.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/343e6fdcc1a57c199073.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/343e6fdcc1a57c199073.json deleted file mode 100644 index 3dbf7f46e08b369a941f4318213610f6e3281d1c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/343e6fdcc1a57c199073.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/008f9050cb333d9c4e07.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/008f9050cb333d9c4e07.json deleted file mode 100644 index 7c43379d8dc320236287fb43cdcd8a5f7c625f27..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/008f9050cb333d9c4e07.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/361b8ba2180401e6292e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/361b8ba2180401e6292e.json deleted file mode 100644 index 4b163ebc71b6d6f9562918210369594bce9b6f97..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/361b8ba2180401e6292e.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7a3793c136f2b226d9d1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7a3793c136f2b226d9d1.json deleted file mode 100644 index bfc72e8cd3941ee768bcdd7bbdb652dc1e391f07..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7a3793c136f2b226d9d1.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c51d828c741d0f7a54ef.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c51d828c741d0f7a54ef.json deleted file mode 100644 index 57cf6fbdfbf4df85fc944b1281a4cb0b59120f1e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c51d828c741d0f7a54ef.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json deleted file mode 100644 index 625d08dca70ac1b81a75b7fef2f459399a6f6532..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json deleted file mode 100644 index 45276a6595cd3b61f50b7c8e45f6b662140c0c4d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json deleted file mode 100644 index f8e969e38469674a00f91fb87a1df44c82526340..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json deleted file mode 100644 index a356113a6d590db9eb7348d7d25fb79685e2574f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen3-1.7B", - "_task": "text-generation", - "architectures": [ - "Qwen3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 6144, - "max_position_embeddings": 40960, - "max_window_layers": 28, - "model_type": "qwen3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen3-1.7B", - "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev1", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000, - "sliding_window": null, - "tie_word_embeddings": true, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json deleted file mode 100644 index 66630fe558766028b5435553aa329a14e7a13241..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json deleted file mode 100644 index 3e167a4933e519119cab99364a00f802616e9ac2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "Jingya/pixart_sigma_pipe_xl_2_512_ms", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": 1, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json deleted file mode 100644 index e7c9160f8f01fd8dae0ecee77a5051710472b9ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": null, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json deleted file mode 100644 index 3691c901541f069077de436fafb30a523f5cfab6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json deleted file mode 100644 index a7a1b611044c6cb7a27610aaddfa11009c0e9f21..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json deleted file mode 100644 index 9d5353428471822248dcf59e562f129da521aad1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "attention_multiplier": 1.0, - "embedding_multiplier": 1.0, - "hidden_act": "silu", - "hidden_size": 32, - "initializer_range": 0.02, - "intermediate_size": 64, - "logits_scaling": 1.0, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", - "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "residual_multiplier": 1.0, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 49152 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/19901c0d5a9c42f170c4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/19901c0d5a9c42f170c4.json deleted file mode 100644 index 5c41b24cdb22ecb767b7aa37637bb7fa3b478fd7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/19901c0d5a9c42f170c4.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/81f8ae2315c11fa1dca0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/81f8ae2315c11fa1dca0.json deleted file mode 100644 index e8b58b45533a0abe382404166f30c6574d94938f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/81f8ae2315c11fa1dca0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/9e2ec383e6c9820f6d7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/9e2ec383e6c9820f6d7c.json deleted file mode 100644 index e6efcd4bca43241c5d9ef2fbf1c00c400a74c770..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/9e2ec383e6c9820f6d7c.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/a37e391c7f1d161aa58a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/a37e391c7f1d161aa58a.json deleted file mode 100644 index ec863893808da6dc55f87211724007fdec736487..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/a37e391c7f1d161aa58a.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json deleted file mode 100644 index 60cd8556db54e5916e2e1da94b78d36d021cb9ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "HloNeuronConfig", - "all_reduce_dtype": null, - "allow_flash_attention": true, - "attention_layout": "HSB", - "attn_output_transposed": false, - "auto_cast_type": "bf16", - "batch_size": 4, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "collectives_layout": "HSB", - "continuous_batching": true, - "fuse_qkv": true, - "group_query_attention": "shard-over-heads", - "log_softmax_scores": false, - "neuronxcc_version": "2.17.194.0+d312836f", - "optimum_neuron_version": "0.3.0.dev2", - "output_all_logits": false, - "sequence_length": 4096, - "tp_degree": 2 - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e9f8f0f0637d7010a52b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e9f8f0f0637d7010a52b.json deleted file mode 100644 index b76ff8d083ba05d966dfe13a683d67f289b40e9c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e9f8f0f0637d7010a52b.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/2b1cec8d5e79797b25bb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/2b1cec8d5e79797b25bb.json deleted file mode 100644 index bb000dc7f2d073b456e0a8634498a9886c227372..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/2b1cec8d5e79797b25bb.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/51856711562a87bfdeae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/51856711562a87bfdeae.json deleted file mode 100644 index 274d3935511e0236c5b98a0c04c24895513a6093..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/51856711562a87bfdeae.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/7961c44cbc9389d17902.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/7961c44cbc9389d17902.json deleted file mode 100644 index 56f92da0a637186de3c7176837031fd85832aab8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/7961c44cbc9389d17902.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/96537b57433542d7bb15.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/96537b57433542d7bb15.json deleted file mode 100644 index f3c7ee3b82f81917db81a1a5594c1c6bf74867fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/96537b57433542d7bb15.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/ab3fbec5d429fde9225b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/ab3fbec5d429fde9225b.json deleted file mode 100644 index 50c7a554220b9a5b502366c7bee54c603e95e1d3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-8b-instruct/ab3fbec5d429fde9225b.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/4231dd4742c9da6fe833.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/4231dd4742c9da6fe833.json deleted file mode 100644 index 5f443445a17b39b9ebbb6d1961129bb2d708b42e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/4231dd4742c9da6fe833.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/219e58efac45b11c6ce0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/219e58efac45b11c6ce0.json deleted file mode 100644 index 924e5e29f9111c899f81b328c04baaec5a85297a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/219e58efac45b11c6ce0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/36ff58b9aa960afe2c27.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/36ff58b9aa960afe2c27.json deleted file mode 100644 index d4dc7a90f9e6edf2576e97295e75d5acc2cfe14e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/36ff58b9aa960afe2c27.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/399a7f0fa21b68c64a9d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/399a7f0fa21b68c64a9d.json deleted file mode 100644 index 7f7806d9c4e1e952bd1fb30a5585cc2b22e22b5f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/399a7f0fa21b68c64a9d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/56e3ddea3c58aa2dc37d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/56e3ddea3c58aa2dc37d.json deleted file mode 100644 index 8ebe0ca6d3c40ea32734fe26505cd24c8631a442..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/56e3ddea3c58aa2dc37d.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/68b929285a6284c320a7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/68b929285a6284c320a7.json deleted file mode 100644 index c45fc8ee431faa401517aa079f288fe955a1ac48..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/68b929285a6284c320a7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7d573e45904144e84fa3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7d573e45904144e84fa3.json deleted file mode 100644 index e790dc6101f74e71f5e77268174216b22b221338..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7d573e45904144e84fa3.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88e2b78f6e96cc7ef02a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88e2b78f6e96cc7ef02a.json deleted file mode 100644 index 1ba3191b82fa9f831793fc40699c2bf411d732e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/88e2b78f6e96cc7ef02a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json deleted file mode 100644 index 0aec4766869e27a9d7e325c11ded5bc4719f15d9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json deleted file mode 100644 index f9e33a0b528098aa1de20a5c6ef47f68185ff6f6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json deleted file mode 100644 index 2ed4bbbe004aabc578e6aacbfb90b8894fd35683..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json deleted file mode 100644 index c35b5953e1121a9eafda62ec6862e5912bab8856..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "llamafactory/tiny-random-Llama-3", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 4, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 64, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "llamafactory/tiny-random-Llama-3", - "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 131072, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 131072, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 131072, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/4d0a6c12961ecddc38f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/4d0a6c12961ecddc38f1.json deleted file mode 100644 index 7ba099142d7f29f19a1578379020e9fedceb110b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/4d0a6c12961ecddc38f1.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/b54891a80a1644741fc0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/b54891a80a1644741fc0.json deleted file mode 100644 index 57f078439e5a2d7b9fe0b50b3d5a8a35b66d685d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/lmsys/vicuna-7b-v1.5/b54891a80a1644741fc0.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-13b-hf/1323edd1ad44153954c6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-13b-hf/1323edd1ad44153954c6.json deleted file mode 100644 index 325cb6252e1965dfbebaa8f72e3f148ad7443329..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-13b-hf/1323edd1ad44153954c6.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-7b-hf/06e50556140c915c54c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-7b-hf/06e50556140c915c54c9.json deleted file mode 100644 index 25c583b78ba5d7e955c9fe07084c0b7ce28b32fc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-2-7b-hf/06e50556140c915c54c9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/8f369923b7dde523131b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/8f369923b7dde523131b.json deleted file mode 100644 index 76a1801e1ef1c400c2ed05052fec409388cf1f3d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/8f369923b7dde523131b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/afec7318f4cc4abdd520.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/afec7318f4cc4abdd520.json deleted file mode 100644 index 7a8caa41a5689e91d0ced034ef420d6129eb9f93..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.1-70B-Instruct/afec7318f4cc4abdd520.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/7571eb4b3ad1107fc762.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/7571eb4b3ad1107fc762.json deleted file mode 100644 index f2ac8a6059f4776ee14d720cc0e00c75acb9ca8a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/7571eb4b3ad1107fc762.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/dd72ee16b10a1945d18b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/dd72ee16b10a1945d18b.json deleted file mode 100644 index a42c3e239dbb14f1e979b833cf2d546da2f99ce9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-1B/dd72ee16b10a1945d18b.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/502ca5d0a22e8ad3a65f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/502ca5d0a22e8ad3a65f.json deleted file mode 100644 index 213ceebbefc880eef28b7cf7b1e7c96872449483..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/502ca5d0a22e8ad3a65f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/b6c271ee6ea13d107faf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/b6c271ee6ea13d107faf.json deleted file mode 100644 index 1ea009596f27ef98e7d189614e180849adc46715..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Llama-3.2-3B/b6c271ee6ea13d107faf.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3-8B/e0ba69fa383afd51f115.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3-8B/e0ba69fa383afd51f115.json deleted file mode 100644 index 0623b3228d27f2ee81eabcf26e0c516c24a8e3de..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3-8B/e0ba69fa383afd51f115.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/118f46b7a5142cad2244.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/118f46b7a5142cad2244.json deleted file mode 100644 index 88adb3a7312b68df7317c8022aed04c7e1f61fdf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/118f46b7a5142cad2244.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/1d8b590856c546768acc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/1d8b590856c546768acc.json deleted file mode 100644 index f2bdbb3698daa60e483173177b7096b7d585e3ef..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/1d8b590856c546768acc.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/213412a30bd525269823.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/213412a30bd525269823.json deleted file mode 100644 index 3b7bea2493fef894da49b7a1cf5d139b37122c2e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/213412a30bd525269823.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/220f7aab4b2dc7f189e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/220f7aab4b2dc7f189e4.json deleted file mode 100644 index 8ef1b30d7c46f8a5febf1ca1c967ce75f66c552a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/220f7aab4b2dc7f189e4.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/27409550f7813e4800b8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/27409550f7813e4800b8.json deleted file mode 100644 index 5b2d50d563e3129becfa6db354a61d9039905b36..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/27409550f7813e4800b8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/667cf1cbfb32e7fe11b8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/667cf1cbfb32e7fe11b8.json deleted file mode 100644 index 7c85dd754cbfc1369bc3d0ad788db06edbdd9865..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/667cf1cbfb32e7fe11b8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/956a7079a7db1ae8dd61.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/956a7079a7db1ae8dd61.json deleted file mode 100644 index 3aadf6644b397b64e5fd15e09f04ed33f8d22fc5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/meta-llama/Meta-Llama-3.1-8B/956a7079a7db1ae8dd61.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/07607a1b708c9dbaacd0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/07607a1b708c9dbaacd0.json deleted file mode 100644 index ff3a789bfc97f3881103418222dfcba236a51a1e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/07607a1b708c9dbaacd0.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/2932926dfadfe379c837.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/2932926dfadfe379c837.json deleted file mode 100644 index 3b0a1ddd1ad65a2423376ba5c7c79cf76978240f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/princeton-nlp/Sheared-LLaMA-1.3B/2932926dfadfe379c837.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json deleted file mode 100644 index 335268f83626ee7106717c2c28ff205f0fce267f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json deleted file mode 100644 index 89a2676c044d205cefe2c26a8d6334b584d46137..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": null, - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 5, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json deleted file mode 100644 index b0418adc9d0cfdcf27d1f3d189d90c1743f9a2b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "unsloth/Llama-3.2-1B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", - "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "unsloth_fixed": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json deleted file mode 100644 index 83d9d035bbbd882bafa5a83c36fa382d406a5649..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json deleted file mode 100644 index 5bcd9cc3ba289b63a355d97b2902e5946e2c547e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json deleted file mode 100644 index 94280ab241953e8ee41eea5696b2a323dfe60111..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "dacorvo/Mixtral-tiny", - "_task": "text-generation", - "architectures": [ - "MixtralForCausalLM" - ], - "attention_dropout": 0.0, - "head_dim": 32, - "hidden_act": "silu", - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 3584, - "max_position_embeddings": 1024, - "model_type": "mixtral", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "dacorvo/Mixtral-tiny", - "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_experts_per_tok": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 8, - "num_local_experts": 8, - "output_router_logits": false, - "rms_norm_eps": 1e-05, - "rope_theta": 10000.0, - "router_aux_loss_coef": 0.001, - "router_jitter_noise": 0.0, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/47e20aeaf86910c15386.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/47e20aeaf86910c15386.json deleted file mode 100644 index cd619e4e19bd96cd4ad8e11d0b3d381fe41bd97b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/47e20aeaf86910c15386.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/9855138a09e67a456596.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/9855138a09e67a456596.json deleted file mode 100644 index 111f4bddaf209e908b287ec13f0c27513fa185db..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/9855138a09e67a456596.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/835d840954b30b1f79d0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/835d840954b30b1f79d0.json deleted file mode 100644 index 9b5a261ff58854f2ad4102213d0c47fa831d4707..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/835d840954b30b1f79d0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/cde49caab3dc0cdc00a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/cde49caab3dc0cdc00a4.json deleted file mode 100644 index fdd55916bb52ead4d0c61943b74af90beabf8c22..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/microsoft/phi-4/cde49caab3dc0cdc00a4.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json deleted file mode 100644 index 8295f5730c425f6792402975af5efaaaed0fa6ed..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json deleted file mode 100644 index 58fbc3eac28b34130a54e22178e39d4bcc7698fe..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json deleted file mode 100644 index 40e356156195f024a84c928d5a71338c7efb695d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/phi-4-tiny-random", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": {}, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 16, - "initializer_range": 0.02, - "intermediate_size": 32, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/phi-4-tiny-random", - "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 2, - "num_hidden_layers": 2, - "num_key_value_heads": 1, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json deleted file mode 100644 index 35f7f17908fbf61e1a6901a743e7d029e50e7b86..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 128, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 128, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 128, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/3e331608acdbb5dee764.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/3e331608acdbb5dee764.json deleted file mode 100644 index fcd8feb9e5051ca0559f4943e9c3564794820cb2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/3e331608acdbb5dee764.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b00fda22f866de8888a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b00fda22f866de8888a8.json deleted file mode 100644 index 8cdd45b341a9b5dc47ca8a1874a36c21324b5037..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b00fda22f866de8888a8.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json deleted file mode 100644 index c1e066ea554df889d980f2fe8f8327b8de5caf03..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/7f51688ffa8ba9af8bb3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/7f51688ffa8ba9af8bb3.json deleted file mode 100644 index 0d9dc4b4238c7f27df70a1abf80519ab87df872d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/7f51688ffa8ba9af8bb3.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/8d65dbd22221a91773c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/8d65dbd22221a91773c9.json deleted file mode 100644 index 9bd4acaed86463d11aacb60939ea24ec5cfac012..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-1.5B/8d65dbd22221a91773c9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/036b60e46872d9ec4580.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/036b60e46872d9ec4580.json deleted file mode 100644 index b10b72e27cf3f7e7d2c59db4c926ad519a4f7b61..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/036b60e46872d9ec4580.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/2c36ca4caf5ff7f192c8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/2c36ca4caf5ff7f192c8.json deleted file mode 100644 index 1b61ee2c25b5510595cacbf37ee36774a1d6b8f9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-14B/2c36ca4caf5ff7f192c8.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/25d21389627244f9b591.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/25d21389627244f9b591.json deleted file mode 100644 index d8c354ce5f3ec55810776d9839975ba952a9c1e8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/25d21389627244f9b591.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/375b0a9305544f168743.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/375b0a9305544f168743.json deleted file mode 100644 index cfde8c4f11d12e75293cc55fc369d380ab5d241a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-32B-Instruct/375b0a9305544f168743.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-72B-Instruct/34b63559454432fe45b3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-72B-Instruct/34b63559454432fe45b3.json deleted file mode 100644 index 6321e24ec5be570f2a0c71f1d77866f3cdf22a0a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-72B-Instruct/34b63559454432fe45b3.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/0d9c495a1e465556a91d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/0d9c495a1e465556a91d.json deleted file mode 100644 index 44889b55784417b3e757d327227c9374ba16838f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/0d9c495a1e465556a91d.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4c53a29a394f2074f58c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4c53a29a394f2074f58c.json deleted file mode 100644 index 595ef0da88cda58bbafef1bcbd5211adef5e5f8c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4c53a29a394f2074f58c.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4d81332a6903b7d6537a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4d81332a6903b7d6537a.json deleted file mode 100644 index 819139979ea904ec37f8f4baad1696e6b7e6b257..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/4d81332a6903b7d6537a.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/984962df207ed41a7b86.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/984962df207ed41a7b86.json deleted file mode 100644 index be55c6462658f4195bbe84c276b584d480ff517d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/984962df207ed41a7b86.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/ebf75edb46237363ff9d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/ebf75edb46237363ff9d.json deleted file mode 100644 index d12a1d2204e6b933d05b44916887bd8f037066b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-7B-Instruct/ebf75edb46237363ff9d.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/9a4cd5b45295f6bd07c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/9a4cd5b45295f6bd07c0.json deleted file mode 100644 index 1614bf753087a460b090bd4e709cfb6ba671a429..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/9a4cd5b45295f6bd07c0.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a86fe230f968916be18a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a86fe230f968916be18a.json deleted file mode 100644 index 01f70f9482b27046d3b6f037900aa83b060ebe14..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/a86fe230f968916be18a.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7676c8e2bc0f9a04403d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7676c8e2bc0f9a04403d.json deleted file mode 100644 index 3b13acaed784408284a73d7aa83ae50057ec3e13..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/7676c8e2bc0f9a04403d.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d5755d7a59c904f6a3e5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d5755d7a59c904f6a3e5.json deleted file mode 100644 index 9e304e2d72aba3b1a7d6aaa1b971a0ce88aad944..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d5755d7a59c904f6a3e5.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/356750550b6e23493242.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/356750550b6e23493242.json deleted file mode 100644 index d94d1a54d010217299ffe2909d50e691ac56bd69..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/356750550b6e23493242.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/86495872ba06b5783f64.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/86495872ba06b5783f64.json deleted file mode 100644 index a0928b3e701658e67800cd7f06426652b76de313..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/86495872ba06b5783f64.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4ec9873a07600ce1e7c2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4ec9873a07600ce1e7c2.json deleted file mode 100644 index cec7f2493282580ac15e0f2d1a579783616a35b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4ec9873a07600ce1e7c2.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6c8f9fafea0984337d99.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6c8f9fafea0984337d99.json deleted file mode 100644 index 54d30bbe8165b887609aea6cd074d3e31bee9ad4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/6c8f9fafea0984337d99.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7dfb589fa6f2a3762d90.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7dfb589fa6f2a3762d90.json deleted file mode 100644 index c0b84a5cceb3642c2f83db203f65aa16de13836c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/7dfb589fa6f2a3762d90.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/85d01b61617028614074.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/85d01b61617028614074.json deleted file mode 100644 index b89947ab76f20f4e71294ba019728bb5c5f379f7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/85d01b61617028614074.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/addc5bb27f50ceea942c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/addc5bb27f50ceea942c.json deleted file mode 100644 index 8043a95ba49ed8d61dea6a59b2aa41434c9628c8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/addc5bb27f50ceea942c.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json deleted file mode 100644 index f974a74db59886b4f746e5025a150a95bcd1d1d1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 2, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 2, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json deleted file mode 100644 index c3e410477517b4dfc804c32715b0a88df964dcc1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json deleted file mode 100644 index 71da86815cb70f248396abeea8d175652b2dfaa4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "yujiepan/qwen2.5-128k-tiny-random", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8, - "initializer_range": 0.02, - "intermediate_size": 16, - "max_position_embeddings": 32768, - "max_window_layers": 1, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", - "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 100, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 100, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 100, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 4, - "num_hidden_layers": 2, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": { - "factor": 4.0, - "original_max_position_embeddings": 32768, - "rope_type": "yarn", - "type": "yarn" - }, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json deleted file mode 100644 index 74af2193065020be3ee3b365d1441205db58fda2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen3-1.7B", - "_task": "text-generation", - "architectures": [ - "Qwen3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 6144, - "max_position_embeddings": 40960, - "max_window_layers": 28, - "model_type": "qwen3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen3-1.7B", - "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev2", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000, - "sliding_window": null, - "tie_word_embeddings": true, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json deleted file mode 100644 index 66630fe558766028b5435553aa329a14e7a13241..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1323e0216a6a2d89baf2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1323e0216a6a2d89baf2.json deleted file mode 100644 index f55cef5404ad188bebc49a6716331ce98fb36a6c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1323e0216a6a2d89baf2.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1ac47c151b5b76119aa0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1ac47c151b5b76119aa0.json deleted file mode 100644 index ec42f81d7ab9a84a2db9ad2180e0236279b90a64..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/1ac47c151b5b76119aa0.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/d44f51e68762485c4976.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/d44f51e68762485c4976.json deleted file mode 100644 index 88d21e324710d25790a806bc8c67200c9664326c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/d44f51e68762485c4976.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e00e0e867ce96d6a4b42.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e00e0e867ce96d6a4b42.json deleted file mode 100644 index 26d55f32a191e4dd34b7008f25c523354f74a311..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e00e0e867ce96d6a4b42.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e5a2e9968aad27e4b19e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e5a2e9968aad27e4b19e.json deleted file mode 100644 index a834241ac72d623c1727a4391284929315479bf7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/e5a2e9968aad27e4b19e.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/79ff18bae1b067116094.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/79ff18bae1b067116094.json deleted file mode 100644 index 5f644ccb2821977297ce62acf5119c8b11cdf603..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/79ff18bae1b067116094.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/cb8d36af17d1358debdf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/cb8d36af17d1358debdf.json deleted file mode 100644 index 79aa2162d294d1909769b427bee231bc5e041203..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/cb8d36af17d1358debdf.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/d65be9f26609a55a8007.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/d65be9f26609a55a8007.json deleted file mode 100644 index a73b26080cac4d803e4fa6332e74cd9e1552612f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/d65be9f26609a55a8007.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/e88c6b9e2a384492e07c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/e88c6b9e2a384492e07c.json deleted file mode 100644 index 9b39cb0ee8716cdbd27b5a059370571937162011..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/e88c6b9e2a384492e07c.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/ed58570cd1d649dae5ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/ed58570cd1d649dae5ec.json deleted file mode 100644 index 7af5c33c713403f327f9a06d73603cd04a6699a0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-8b-instruct/ed58570cd1d649dae5ec.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b32d8c989faea596499f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b32d8c989faea596499f.json deleted file mode 100644 index 3b606a90125b9072624ea9fd239670ecbb10f2bd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b32d8c989faea596499f.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/545e10a34fdb4ffe2502.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/545e10a34fdb4ffe2502.json deleted file mode 100644 index b14cfd868fd2bbf330a60499a21f14bfe3221c00..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/545e10a34fdb4ffe2502.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5a0f8282a8edbb733db7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5a0f8282a8edbb733db7.json deleted file mode 100644 index e969e8ce4ad6cff8e2d888310313d510bb014bae..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5a0f8282a8edbb733db7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7b432da1228c3df638ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7b432da1228c3df638ad.json deleted file mode 100644 index 2aa796356e5fa81f3e233319e4a9a086e72acb8b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7b432da1228c3df638ad.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7c7c62e78c4647129345.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7c7c62e78c4647129345.json deleted file mode 100644 index a008040b1b0d0d232843b16df28f4578fd1dbc33..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/7c7c62e78c4647129345.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8f82332391897f164fe3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8f82332391897f164fe3.json deleted file mode 100644 index a4fd1516ebafa088f69f9c31bf1ca1b419ebafe8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/8f82332391897f164fe3.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9c982616331f7dcb0e23.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9c982616331f7dcb0e23.json deleted file mode 100644 index 24ed0ba8b644fd80bcc540a06b4c8383e82a3706..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/9c982616331f7dcb0e23.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bda582e76967ae0cd8d8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bda582e76967ae0cd8d8.json deleted file mode 100644 index 9889cb2aac84edf13cef9a7222120c39b8903cf4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/bda582e76967ae0cd8d8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/2a6103aac13b823fb0d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/2a6103aac13b823fb0d2.json deleted file mode 100644 index 1d667e03c2420be857834969d8ad32e968ee6f4e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/2a6103aac13b823fb0d2.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/ff2e6dda658436a52641.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/ff2e6dda658436a52641.json deleted file mode 100644 index 97a76007b1f6c134b5f470bba2f7a5ec16e10e9d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/lmsys/vicuna-7b-v1.5/ff2e6dda658436a52641.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-13b-hf/2bc6e477cf5bcc9fd5d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-13b-hf/2bc6e477cf5bcc9fd5d2.json deleted file mode 100644 index 35b5a6ff827259185c17b66a84795dfcb79102d6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-13b-hf/2bc6e477cf5bcc9fd5d2.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-7b-hf/1a37be3422e0b5e4e92d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-7b-hf/1a37be3422e0b5e4e92d.json deleted file mode 100644 index c74f2fae55318c2c89dfbaabb1d7ee111721d600..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-2-7b-hf/1a37be3422e0b5e4e92d.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/10305e5dc9e12edc1ed9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/10305e5dc9e12edc1ed9.json deleted file mode 100644 index 8f4e6f1355788d5025ee953ec901ea7d5d3f704f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/10305e5dc9e12edc1ed9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/5346ca06410ee404cfde.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/5346ca06410ee404cfde.json deleted file mode 100644 index bfc8fdfb7cf4eca745f140359f85437c71834061..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.1-70B-Instruct/5346ca06410ee404cfde.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-1B/68e89dceda6db51d67c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-1B/68e89dceda6db51d67c1.json deleted file mode 100644 index 547e8dd22ae478d6cef3909c02113e7ede95d031..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-1B/68e89dceda6db51d67c1.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-3B/57484b1a33a78ada8c0c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-3B/57484b1a33a78ada8c0c.json deleted file mode 100644 index 5a23958e209a0074dcf136b65607c3f0e0c6413a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Llama-3.2-3B/57484b1a33a78ada8c0c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3-8B/230a76339de35b8f1e11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3-8B/230a76339de35b8f1e11.json deleted file mode 100644 index e97e20a25e7b1f1f3aadaa202cbc9c8a9bee6e3e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3-8B/230a76339de35b8f1e11.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/17606b6a2bf50014ed6c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/17606b6a2bf50014ed6c.json deleted file mode 100644 index 978a8ef526939b65cc656381dfe08df6f2343510..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/17606b6a2bf50014ed6c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/3709f796c6fb7e1c8e7a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/3709f796c6fb7e1c8e7a.json deleted file mode 100644 index de77777c1a9a3103cf8abe3288b50f9ccb80731c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/3709f796c6fb7e1c8e7a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/548828e81cea70c086b9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/548828e81cea70c086b9.json deleted file mode 100644 index 02b3ab10b2f48e095f4baf8bd173cf87f247dfd0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/548828e81cea70c086b9.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/5ba32e0f8a04a187c83a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/5ba32e0f8a04a187c83a.json deleted file mode 100644 index 49af3ebbd30940af14cb3894faccce98fe13742f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/5ba32e0f8a04a187c83a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a1b36d6e18e9b138253.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a1b36d6e18e9b138253.json deleted file mode 100644 index 9fab7c7b8aa322d1d56daf01fd21974992523c90..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a1b36d6e18e9b138253.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a90a0525aaf4bdf9599.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a90a0525aaf4bdf9599.json deleted file mode 100644 index e5c1fcf0677d57db218ef7cdfc6082508814d42a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/9a90a0525aaf4bdf9599.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/f21dae678aa88a8ad625.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/f21dae678aa88a8ad625.json deleted file mode 100644 index 0dec4400f3c1fcc426c0311a1c5c1043cf433288..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/meta-llama/Meta-Llama-3.1-8B/f21dae678aa88a8ad625.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6351d3ce3a699ad3537c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6351d3ce3a699ad3537c.json deleted file mode 100644 index 534bc3a6de223b4ee67ba4f39cb0c02ab4e2f670..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6351d3ce3a699ad3537c.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6484f9822633c16921a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6484f9822633c16921a4.json deleted file mode 100644 index 265290c10fb6c0316e4e1f82f67206c474289ce0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/llama/princeton-nlp/Sheared-LLaMA-1.3B/6484f9822633c16921a4.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/8e968e905a9e8c65d3f0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/8e968e905a9e8c65d3f0.json deleted file mode 100644 index cb7f36548270c050fd74ce14daeec073a8e9c563..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/8e968e905a9e8c65d3f0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/ce1578abca2185bff056.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/ce1578abca2185bff056.json deleted file mode 100644 index dfe2b788f296445f1d994115d44ce4fc5013c96f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/ce1578abca2185bff056.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/029462585188fcbfe321.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/029462585188fcbfe321.json deleted file mode 100644 index 5b39f6d1984dbee3f297c3d020d5ba37783b8830..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/029462585188fcbfe321.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/c07fd8c4328c08def00e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/c07fd8c4328c08def00e.json deleted file mode 100644 index 2832cfa6c1ff455b2987d3eb9919e0774a392868..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/phi3/microsoft/phi-4/c07fd8c4328c08def00e.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/850491ac77d96a0cc801.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/850491ac77d96a0cc801.json deleted file mode 100644 index ffd5f0e753dce13a1e9a85a0538d57a1bd8adca3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/850491ac77d96a0cc801.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/aa5bdb4476b4a5688465.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/aa5bdb4476b4a5688465.json deleted file mode 100644 index 30a1419adcdf92a662903bfcbb61a0918b0a248d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/aa5bdb4476b4a5688465.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/09a6cbb875483e0aee5d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/09a6cbb875483e0aee5d.json deleted file mode 100644 index 74b9803794b6c638c584da96b043845ccd518cd6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/09a6cbb875483e0aee5d.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/dac42d34f60f763cde26.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/dac42d34f60f763cde26.json deleted file mode 100644 index 9caf65de2541bc7d25cfe4925d35081377378ced..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-1.5B/dac42d34f60f763cde26.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/4d87d57f81315833958c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/4d87d57f81315833958c.json deleted file mode 100644 index 46e7f2fe2655c6350c6df61ca0ce846ba4ac58f1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/4d87d57f81315833958c.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/f583234f67e889e71787.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/f583234f67e889e71787.json deleted file mode 100644 index fde98174d26919fa78106ddc391bca7798d45815..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-14B/f583234f67e889e71787.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/57ca9b21fef724c4903b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/57ca9b21fef724c4903b.json deleted file mode 100644 index e95f6745dca34b3772adacc8f4cff2f98796151d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/57ca9b21fef724c4903b.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/708dfea55efdf1bccd97.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/708dfea55efdf1bccd97.json deleted file mode 100644 index 7f37c6063031f3a2fad6840fb1b9d98b5f582cf7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-32B-Instruct/708dfea55efdf1bccd97.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-72B-Instruct/8166eeb90571939625d5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-72B-Instruct/8166eeb90571939625d5.json deleted file mode 100644 index 12256cb8982eaf03f1cf1c6b74f13302d3fae6b8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-72B-Instruct/8166eeb90571939625d5.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/1d20083dc22321b174eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/1d20083dc22321b174eb.json deleted file mode 100644 index 64d28979dfbe9297faf58898f94f539cc561e020..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/1d20083dc22321b174eb.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/570a2abd2b1d5f39d7ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/570a2abd2b1d5f39d7ac.json deleted file mode 100644 index 6362e1a9f4bfa968b1be5942e5a8b26978335437..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/570a2abd2b1d5f39d7ac.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/93d0bd381df44d974a5f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/93d0bd381df44d974a5f.json deleted file mode 100644 index cedfd37dcbe7f3230adae3b3a811a4230c2f158f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/93d0bd381df44d974a5f.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/b6925cd4f85b2565f517.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/b6925cd4f85b2565f517.json deleted file mode 100644 index a737957c52c4bd87f35ea6b26e3fc78ff861c0e1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/b6925cd4f85b2565f517.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/c332d162c2a9aab418e7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/c332d162c2a9aab418e7.json deleted file mode 100644 index 90e69bb15c4fae72918e4b0198bfde0b7545b829..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-7B-Instruct/c332d162c2a9aab418e7.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/18e83fa01cd385ba4ac8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/18e83fa01cd385ba4ac8.json deleted file mode 100644 index 1a6480f58eeba6927a194b40d946d0472c10c692..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/18e83fa01cd385ba4ac8.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e3990e4fdada694c42e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e3990e4fdada694c42e8.json deleted file mode 100644 index 4290ea29677fb2c739a0f22cbfde086a67d9ffc8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e3990e4fdada694c42e8.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/14fc7d2b1e59c91e59a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/14fc7d2b1e59c91e59a0.json deleted file mode 100644 index 8ea330bfaa6e8caae8c13e32cee65bdfbbdb069d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/14fc7d2b1e59c91e59a0.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/23c4e8c27439bbfdc46f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/23c4e8c27439bbfdc46f.json deleted file mode 100644 index aa32d2f98c37004fc1d3545fab0bcb0872e91149..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/23c4e8c27439bbfdc46f.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/12755c3944b2cdd9d40e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/12755c3944b2cdd9d40e.json deleted file mode 100644 index 32ce9defc65a2aba040263ba5d0a7549b6644330..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/12755c3944b2cdd9d40e.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a3653c0ec02e4b30cae2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a3653c0ec02e4b30cae2.json deleted file mode 100644 index 6f4f89b8398f2674fb544114a183b51c46de4813..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/a3653c0ec02e4b30cae2.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/024e25ec79797b08e34d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/024e25ec79797b08e34d.json deleted file mode 100644 index b0a5ea84f559514e6613f0d903bacf1c25fbc6e0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/024e25ec79797b08e34d.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/1334b6304e8b8354ba02.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/1334b6304e8b8354ba02.json deleted file mode 100644 index 9a98cf4de3bf6626c7a509059964e87b8724edce..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/1334b6304e8b8354ba02.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/43a26fb33b44d0bbb432.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/43a26fb33b44d0bbb432.json deleted file mode 100644 index fa2991df347e9b1869d5bd4239d4acf19d2d3b7a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/43a26fb33b44d0bbb432.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/788a91b99e0e6690ece0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/788a91b99e0e6690ece0.json deleted file mode 100644 index d997e77752881133771327782385412c200ab792..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/788a91b99e0e6690ece0.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c77092f29a70383ae1a9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c77092f29a70383ae1a9.json deleted file mode 100644 index fddb95070d1aa811423d3253768d547ae7829a4c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev3/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/c77092f29a70383ae1a9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev3", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json deleted file mode 100644 index 3e167a4933e519119cab99364a00f802616e9ac2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/Jingya/pixart_sigma_pipe_xl_2_512_ms/befe64f8447a5b02ca93.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "Jingya/pixart_sigma_pipe_xl_2_512_ms", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": 1, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json deleted file mode 100644 index e7c9160f8f01fd8dae0ecee77a5051710472b9ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/aecf63194b748979aee7.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", - "_task": null, - "text_encoder": { - "architectures": [ - "T5EncoderModel" - ], - "classifier_dropout": 0.0, - "d_ff": 10240, - "d_kv": 64, - "d_model": 4096, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 120, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_decoder_layers": 24, - "num_heads": 64, - "num_layers": 24, - "output_past": true, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32128 - }, - "transformer": { - "_class_name": "PixArtTransformer2DModel", - "activation_fn": "gelu-approximate", - "attention_bias": true, - "attention_head_dim": 72, - "attention_type": "default", - "caption_channels": 4096, - "cross_attention_dim": 1152, - "double_self_attention": false, - "dropout": 0.0, - "in_channels": 4, - "interpolation_scale": null, - "neuron": { - "auto_cast": null, - "auto_cast_type": null, - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_encoder_hidden_size": 4096, - "static_height": 64, - "static_num_channels": 4, - "static_patch_size": 2, - "static_sequence_length": 120, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_elementwise_affine": false, - "norm_eps": 1e-06, - "norm_num_groups": 32, - "norm_type": "ada_norm_single", - "num_attention_heads": 16, - "num_embeds_ada_norm": 1000, - "num_layers": 28, - "num_vector_embeds": null, - "only_cross_attention": false, - "out_channels": 8, - "patch_size": 2, - "upcast_attention": false, - "use_additional_conditions": null, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2b933adb496fcd442664.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2b933adb496fcd442664.json deleted file mode 100644 index f159e2f891f01844f3abefdfd546a712393e8cd0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/2b933adb496fcd442664.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3252ac1138f9d0d35829.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3252ac1138f9d0d35829.json deleted file mode 100644 index 0c5febdf10436c8d7297ecd8a2fce5fb9f574b97..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/3252ac1138f9d0d35829.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/a3c21dd1baab177e18e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/a3c21dd1baab177e18e4.json deleted file mode 100644 index 0d529c1c04b1d3c242ea09a73ac7851c635a80df..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/a3c21dd1baab177e18e4.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c7927a430db3da5413ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c7927a430db3da5413ec.json deleted file mode 100644 index 55b914b892523c263c53c29a0264797426655493..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/c7927a430db3da5413ec.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/d70a11e2f6bd160a594c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/d70a11e2f6bd160a594c.json deleted file mode 100644 index b11e02f6a564615b4f4e6d5ba0446b5738b53d84..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-2b-instruct/d70a11e2f6bd160a594c.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-2b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.015625, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "logits_scaling": 8.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", - "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 5000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/029fcc28c43ac4e4e764.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/029fcc28c43ac4e4e764.json deleted file mode 100644 index a61b217e13b52799c3aeb6b3f977c49fe73f0af8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/029fcc28c43ac4e4e764.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/4e8865ab8f28d4fef3d1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/4e8865ab8f28d4fef3d1.json deleted file mode 100644 index 01d91be657e4de369c031eb34a94576bd640f7f1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/4e8865ab8f28d4fef3d1.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/63a4430604a70c4ed34b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/63a4430604a70c4ed34b.json deleted file mode 100644 index 1c621e6eb2bb1fb39033ad17d8db0b56d9372cdf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/63a4430604a70c4ed34b.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/c33d487b158322d41370.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/c33d487b158322d41370.json deleted file mode 100644 index 550d588f3715e0dd2aa40ab06efc179c09ea064f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/c33d487b158322d41370.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/d8020b9917c782f35c75.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/d8020b9917c782f35c75.json deleted file mode 100644 index bd8e26bb086f1824e14ac972aa7293151622e4d0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/ibm-granite/granite-3.1-8b-instruct/d8020b9917c782f35c75.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "ibm-granite/granite-3.1-8b-instruct", - "_task": "text-generation", - "architectures": [ - "GraniteForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.1, - "attention_multiplier": 0.0078125, - "embedding_multiplier": 12.0, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 12800, - "logits_scaling": 16.0, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "granite", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", - "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 40, - "num_key_value_heads": 8, - "residual_multiplier": 0.22, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 49155 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/5b6c65464175affe9b72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/5b6c65464175affe9b72.json deleted file mode 100644 index 9f99db8b6b8dcf9aa0b7a913d8abd909ce664b2e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/5b6c65464175affe9b72.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5632, - "max_position_embeddings": 2048, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "checkpoint_revision": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 22, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/030649c091b19610cd4c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/030649c091b19610cd4c.json deleted file mode 100644 index dd9f778bb115b26b2b093cce386f6ef1d7dcfe93..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/030649c091b19610cd4c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/287dccf6dbbe7a4267d0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/287dccf6dbbe7a4267d0.json deleted file mode 100644 index 428e390c80cfb5635d092e45b269cf2d46f24aa6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/287dccf6dbbe7a4267d0.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/38e4f1936394510b2255.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/38e4f1936394510b2255.json deleted file mode 100644 index 9ad0c196ad42ce3ea68052246eb9384caf7a4eb5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/38e4f1936394510b2255.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/420915e2856354d19181.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/420915e2856354d19181.json deleted file mode 100644 index ff475cd42c54abd57c41e4561129c711ff84080b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/420915e2856354d19181.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3e748d977aa7d980b53.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3e748d977aa7d980b53.json deleted file mode 100644 index 9908577ff36367ae67707be2955f01b9c453aba0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c3e748d977aa7d980b53.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/de30efb968e329f288ea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/de30efb968e329f288ea.json deleted file mode 100644 index f549115643b7975605521d18f9ae198dc9fbc7d0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/de30efb968e329f288ea.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e2d0b8067f21edf8dd3f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e2d0b8067f21edf8dd3f.json deleted file mode 100644 index 2730c953d2177ec7a66ba7a5bde0cb445428c2ff..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e2d0b8067f21edf8dd3f.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/35104a0bc1895c0587a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/35104a0bc1895c0587a6.json deleted file mode 100644 index 9d10c8e6044229338048fc2afa9da61929b7f17b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/35104a0bc1895c0587a6.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/eeadd6a0f92923de8be9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/eeadd6a0f92923de8be9.json deleted file mode 100644 index 68d50f87a94991c55c7f9cd49cf9a4c5cdd31964..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/lmsys/vicuna-7b-v1.5/eeadd6a0f92923de8be9.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "lmsys/vicuna-7b-v1.5", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "lmsys/vicuna-7b-v1.5", - "checkpoint_revision": "3321f76e3f527bd14065daf69dad9344000a201d", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-13b-hf/90fac6c5fc58375c2361.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-13b-hf/90fac6c5fc58375c2361.json deleted file mode 100644 index 09da222da18d6c3c1d9e144f5fa4e6925dca513f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-13b-hf/90fac6c5fc58375c2361.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-13b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-13b-hf", - "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 40, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-7b-hf/807f841e3a79e1f3d26f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-7b-hf/807f841e3a79e1f3d26f.json deleted file mode 100644 index 30a28498ce90874236b6e95f92bea941e2e7ff29..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-2-7b-hf/807f841e3a79e1f3d26f.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-2-7b-hf", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-2-7b-hf", - "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 2048, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 2048, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 2048, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/1b365dbb2e49d6a1ff45.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/1b365dbb2e49d6a1ff45.json deleted file mode 100644 index 997ef41f2e94ed8919856b3b219111982bc9e25e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/1b365dbb2e49d6a1ff45.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/5b9a60e8c7ca2619f287.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/5b9a60e8c7ca2619f287.json deleted file mode 100644 index 9ead1e0be4d814e79ff098c84bba6a797af58e4d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.1-70B-Instruct/5b9a60e8c7ca2619f287.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.1-70B-Instruct", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 28672, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct", - "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-1B/38b9c2e8a075d367af42.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-1B/38b9c2e8a075d367af42.json deleted file mode 100644 index ff7376ebdc48770b33aadbb713014d7be36272e5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-1B/38b9c2e8a075d367af42.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-1B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-1B", - "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 16, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-3B/bb567ff38204f1bf9ddb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-3B/bb567ff38204f1bf9ddb.json deleted file mode 100644 index c083e65de643959e1d8b5cd950cd023aaf8b186a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Llama-3.2-3B/bb567ff38204f1bf9ddb.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Llama-3.2-3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Llama-3.2-3B", - "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 24, - "num_hidden_layers": 28, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 32.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3-8B/5c4a351410776591c7a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3-8B/5c4a351410776591c7a8.json deleted file mode 100644 index 51b60bbb2486b0308fd681f2ef84f9adfaadf2c4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3-8B/5c4a351410776591c7a8.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 8192, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3-8B", - "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0f897345ea4e470463fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0f897345ea4e470463fe.json deleted file mode 100644 index a589a3a5e27dcb51f12f50fef9f7ee73868a5e7c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/0f897345ea4e470463fe.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/14fabc49f825c58b7427.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/14fabc49f825c58b7427.json deleted file mode 100644 index ba08485bcd93e5d298503bbced294dc959fd01ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/14fabc49f825c58b7427.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/461bdf40f0dad163efca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/461bdf40f0dad163efca.json deleted file mode 100644 index 0f932c136056e66327dd684ccd959788f20046ef..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/461bdf40f0dad163efca.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/88757947c34396c9f9a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/88757947c34396c9f9a8.json deleted file mode 100644 index 80ede440eb0121327879040040b4b2f58c2fa64b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/88757947c34396c9f9a8.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/8ce203c7dff73fd758e7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/8ce203c7dff73fd758e7.json deleted file mode 100644 index 0951fd2061c4b82ff945fdb8666e2fb4978216fa..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/8ce203c7dff73fd758e7.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/b55e9b3358f73bfc8e76.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/b55e9b3358f73bfc8e76.json deleted file mode 100644 index d68a3920cb2ce9d4ac2babf877a3b0d5934008ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/b55e9b3358f73bfc8e76.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/ea2142b0858eee139b4a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/ea2142b0858eee139b4a.json deleted file mode 100644 index 1bca89871b99fd3143127706896976a5ae87d73a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/ea2142b0858eee139b4a.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "meta-llama/Meta-Llama-3.1-8B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", - "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 128256 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/395a6f32985d1a6fe6bd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/395a6f32985d1a6fe6bd.json deleted file mode 100644 index 26b67e0224023e34d1a9137c457dc6f4b66ebae3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/395a6f32985d1a6fe6bd.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/695c2b875d0451eabfcb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/695c2b875d0451eabfcb.json deleted file mode 100644 index 9cce1cc901d87c42dc6f399d0e9b3583cdc0bded..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/princeton-nlp/Sheared-LLaMA-1.3B/695c2b875d0451eabfcb.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "_task": "text-generation", - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 2048, - "initializer_range": 0.02, - "intermediate_size": 5504, - "max_position_embeddings": 4096, - "mlp_bias": false, - "model_type": "llama", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", - "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "float16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "float16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32000 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/32cf3abd57ca0217118c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/32cf3abd57ca0217118c.json deleted file mode 100644 index 4bcefd446004100329008893dd5c3468e30e4a56..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/32cf3abd57ca0217118c.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/6be2052b6d2e21b69bde.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/6be2052b6d2e21b69bde.json deleted file mode 100644 index 5b5e763d0cfb3d5838c4da3ca920d4d9c12a4f57..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/Phi-3-mini-4k-instruct/6be2052b6d2e21b69bde.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/Phi-3-mini-4k-instruct", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 3072, - "initializer_range": 0.02, - "intermediate_size": 8192, - "max_position_embeddings": 4096, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", - "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "original_max_position_embeddings": 4096, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "sliding_window": 2047, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 32064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/093c2d852aeadedab09c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/093c2d852aeadedab09c.json deleted file mode 100644 index f87a72dee749e44007e9c476382b7b603304e575..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/093c2d852aeadedab09c.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/1d133e17e9c02a89acef.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/1d133e17e9c02a89acef.json deleted file mode 100644 index 3230008f1ef856457408801c3f078b3cbe05eb19..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/microsoft/phi-4/1d133e17e9c02a89acef.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "microsoft/phi-4", - "_task": "text-generation", - "architectures": [ - "Phi3ForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "embd_pdrop": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 17920, - "max_position_embeddings": 16384, - "model_type": "phi3", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "microsoft/phi-4", - "checkpoint_revision": "187ef0342fff0eb3333be9f00389385e95ef0b61", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": true, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 10, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 10, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 40, - "num_key_value_heads": 10, - "original_max_position_embeddings": 16384, - "partial_rotary_factor": 1.0, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 250000, - "sliding_window": null, - "tie_word_embeddings": false, - "use_cache": true, - "vocab_size": 100352 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/23118127a1cf58797100.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/23118127a1cf58797100.json deleted file mode 100644 index 67deb0e9206f635e75106fc47e69d0e680603c25..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/23118127a1cf58797100.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/cc411d7a2728d29949b1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/cc411d7a2728d29949b1.json deleted file mode 100644 index 9f4b9f710568bf9c663446d9dae29f5a24771cc5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-0.5B/cc411d7a2728d29949b1.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-0.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 896, - "initializer_range": 0.02, - "intermediate_size": 4864, - "max_position_embeddings": 32768, - "max_window_layers": 24, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-0.5B", - "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 14, - "num_hidden_layers": 24, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 32768, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/753218fc7d1385c6a8e0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/753218fc7d1385c6a8e0.json deleted file mode 100644 index f484738b0e26075949cb6efdfafe6b8c933b5b32..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/753218fc7d1385c6a8e0.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/a48cc7f828d1f0c4f7b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/a48cc7f828d1f0c4f7b7.json deleted file mode 100644 index d500c45ca7fe50c28285d66541c39a3d9c83b1d4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-1.5B/a48cc7f828d1f0c4f7b7.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-1.5B", - "checkpoint_revision": "8faed761d45a263340a0528343f099c05c9a4323", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": true, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/2d16ce8f83cc6b99e6ae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/2d16ce8f83cc6b99e6ae.json deleted file mode 100644 index aadd4a4c6d8a20fd070ad7d238e2f53984dd2e32..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/2d16ce8f83cc6b99e6ae.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/3bbb02e203ab4b85ff92.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/3bbb02e203ab4b85ff92.json deleted file mode 100644 index 189511c808029029b478c7a1683cea0bfcf2f2ad..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-14B/3bbb02e203ab4b85ff92.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-14B", - "checkpoint_revision": "97e1e76335b7017d8f67c08a19d103c0504298c9", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0b385125ac34d33bc8a3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0b385125ac34d33bc8a3.json deleted file mode 100644 index e57ce2bed1afd87701af789c00f4ba9aeb4810ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/0b385125ac34d33bc8a3.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/3c6c86fb7476c0a1e7fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/3c6c86fb7476c0a1e7fe.json deleted file mode 100644 index 2e18a93cd05c629dc040c228e326f04b26709473..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-32B-Instruct/3c6c86fb7476c0a1e7fe.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-32B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-32B-Instruct", - "checkpoint_revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/b3f383fe056aaa94ed6b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/b3f383fe056aaa94ed6b.json deleted file mode 100644 index fd5297f3ef063eb2b2bb231d76c8476135fdd92c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-72B-Instruct/b3f383fe056aaa94ed6b.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-72B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 8192, - "initializer_range": 0.02, - "intermediate_size": 29568, - "max_position_embeddings": 32768, - "max_window_layers": 70, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-72B-Instruct", - "checkpoint_revision": "495f39366efef23836d0cfae4fbe635880d2be31", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 24, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 24, - "vocab_parallel": false - }, - "num_attention_heads": 64, - "num_hidden_layers": 80, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/150964fcaec325a8a498.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/150964fcaec325a8a498.json deleted file mode 100644 index 4ec867b57d0107a0e82242d887170f191c7a8707..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/150964fcaec325a8a498.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/18d7461c59deedd153d8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/18d7461c59deedd153d8.json deleted file mode 100644 index 49264bc127eb870f48f4b6a89bbdba5a46729bfd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/18d7461c59deedd153d8.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/2895fb605ca87dd37c35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/2895fb605ca87dd37c35.json deleted file mode 100644 index e6f426cde10cc177dbbda1ca09789f450f1ebd69..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/2895fb605ca87dd37c35.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/415b1c1be341ccdf0ff0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/415b1c1be341ccdf0ff0.json deleted file mode 100644 index 3d4f24c9b35737b1cac015f4ef823b875455ba1d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/415b1c1be341ccdf0ff0.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/4b500fcdc1055f9741f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/4b500fcdc1055f9741f9.json deleted file mode 100644 index 2ae853b30b0662b7c41e4422fdf3736a02b1b279..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/Qwen/Qwen2.5-7B-Instruct/4b500fcdc1055f9741f9.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "Qwen/Qwen2.5-7B-Instruct", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 32768, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "Qwen/Qwen2.5-7B-Instruct", - "checkpoint_revision": "a09a35458c702b33eeacc393d103063234e8bc28", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6edf6a25756d9176c212.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6edf6a25756d9176c212.json deleted file mode 100644 index 93db3bba304c383f0ceaf53a0d4672727ab5c655..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6edf6a25756d9176c212.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e32a3fcc211bb5b4a3ef.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e32a3fcc211bb5b4a3ef.json deleted file mode 100644 index 5cb757bf704d27ec5439fc760552690a312a32d4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/e32a3fcc211bb5b4a3ef.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 1536, - "initializer_range": 0.02, - "intermediate_size": 8960, - "max_position_embeddings": 131072, - "max_window_layers": 21, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "checkpoint_revision": "ad9f0ae0864d7fbcd1cd905e3c6c5b069cc8b562", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 12, - "num_hidden_layers": 28, - "num_key_value_heads": 2, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 151936 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0d42b7381bf1c17d4852.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0d42b7381bf1c17d4852.json deleted file mode 100644 index 4d94170de29304c5f6894078fe5a5c6c5b104e92..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/0d42b7381bf1c17d4852.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5d4bb2ae4cb05e019477.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5d4bb2ae4cb05e019477.json deleted file mode 100644 index e3cd352d2664d5d26687921cb2f61c72fda71061..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/5d4bb2ae4cb05e019477.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 13824, - "max_position_embeddings": 131072, - "max_window_layers": 48, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 16, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "checkpoint_revision": "1df8507178afcc1bef68cd8c393f61a886323761", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 16, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 48, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/493cfa6e461fb99f9ad9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/493cfa6e461fb99f9ad9.json deleted file mode 100644 index 7ff72fd57ca2871e5a13013fc935ba8bde7d92dd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/493cfa6e461fb99f9ad9.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/588d7a4bf1d2f6e2a90d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/588d7a4bf1d2f6e2a90d.json deleted file mode 100644 index 316472fcf082c05e2f468df9bd9d18d57f4843a7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/588d7a4bf1d2f6e2a90d.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 5120, - "initializer_range": 0.02, - "intermediate_size": 27648, - "max_position_embeddings": 131072, - "max_window_layers": 64, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "checkpoint_revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 40, - "num_hidden_layers": 64, - "num_key_value_heads": 8, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 1000000.0, - "sliding_window": 131072, - "tie_word_embeddings": false, - "use_cache": true, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/132182715ae362cfca28.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/132182715ae362cfca28.json deleted file mode 100644 index b63a58ae510d4c63348c78c7f608d66c1af9894f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/132182715ae362cfca28.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 32, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 32, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4eb9a0d0db5c7b6d4ec5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4eb9a0d0db5c7b6d4ec5.json deleted file mode 100644 index 2b744820ad1a60d737ea73e369182988acd7fd1f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4eb9a0d0db5c7b6d4ec5.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/996196eddd2c4391650b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/996196eddd2c4391650b.json deleted file mode 100644 index e17b14761c5b7c566724bc1bfde2383001e50025..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/996196eddd2c4391650b.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 1, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": false, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 8, - "logical_nc_config": 1, - "max_batch_size": 1, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": true, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 8, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a6d58197d6fe827b2472.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a6d58197d6fe827b2472.json deleted file mode 100644 index 3eb05f188c261f22b5a7266fd3536957803309cb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/a6d58197d6fe827b2472.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 8, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 8, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d1044f19045142c9c8a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d1044f19045142c9c8a4.json deleted file mode 100644 index 30554ce2f3d09510feb821dc350df425d9cda4c7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/d1044f19045142c9c8a4.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "_entry_class": "SingleModelCacheEntry", - "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "_task": "text-generation", - "architectures": [ - "Qwen2ForCausalLM" - ], - "attention_dropout": 0.0, - "hidden_act": "silu", - "hidden_size": 3584, - "initializer_range": 0.02, - "intermediate_size": 18944, - "max_position_embeddings": 131072, - "max_window_layers": 28, - "model_type": "qwen2", - "neuron": { - "_serialized_key": "NxDNeuronConfig", - "async_mode": false, - "attn_kernel_enabled": false, - "batch_size": 4, - "capacity_factor": null, - "cc_pipeline_tiling_factor": 2, - "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "checkpoint_revision": "916b56a44061fd5cd7d6a8fb632557ed4f724f60", - "continuous_batching": true, - "enable_bucketing": false, - "ep_degree": 1, - "flash_decoding_enabled": false, - "fused_qkv": false, - "glu_mlp": true, - "is_chunked_prefill": false, - "local_ranks_size": 2, - "logical_nc_config": 1, - "max_batch_size": 4, - "max_context_length": 4096, - "max_topk": 256, - "mlp_kernel_enabled": false, - "mlp_kernel_fuse_residual_add": false, - "n_active_tokens": 4096, - "neuronxcc_version": "2.17.194.0+d312836f", - "num_cores_per_group": 1, - "on_device_sampling": false, - "optimum_neuron_version": "0.3.0.dev4", - "output_logits": false, - "padding_side": "right", - "pp_degree": 1, - "qk_layernorm": false, - "qkv_kernel_enabled": false, - "rpl_reduce_dtype": "bfloat16", - "sequence_length": 4096, - "sequence_parallel_enabled": false, - "speculation_length": 0, - "start_rank_id": 0, - "target": null, - "torch_dtype": "bfloat16", - "tp_degree": 2, - "vocab_parallel": false - }, - "num_attention_heads": 28, - "num_hidden_layers": 28, - "num_key_value_heads": 4, - "rms_norm_eps": 1e-06, - "rope_scaling": null, - "rope_theta": 10000, - "sliding_window": 4096, - "tie_word_embeddings": false, - "use_cache": true, - "use_mrope": false, - "use_sliding_window": false, - "vocab_size": 152064 -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json deleted file mode 100644 index a31aae35589c29c4e68f007cc2e2403126a2f43b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/290d6364f00ca86a0a51.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json deleted file mode 100644 index da96dbb64fa025daef3187e2adcdb83885abfad2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/jyoung105/stable-diffusion-v1-5/b74e4b6342153be8fdd2.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "jyoung105/stable-diffusion-v1-5", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": 8, - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 768, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": false, - "use_linear_projection": false - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json deleted file mode 100644 index 66630fe558766028b5435553aa329a14e7a13241..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/68031b89e85788c276aa.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json deleted file mode 100644 index 462022c563c8072be26f3101128e4ef4ef4267ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/sdxl-turbo/c4162c03a5bca6d7620e.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/sdxl-turbo", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json deleted file mode 100644 index ad95d479b1c151684b8bcac694ee19b37ea5cca5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/ee34e363190e7c6310a6.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 64, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 64, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json deleted file mode 100644 index 9c3fbb3b2f0ded30aa2aac828918dba7b28659b0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-2-1/f41992c9042c864c8f94.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-2-1", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1024, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 16, - "num_hidden_layers": 23, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": null, - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": null, - "attention_head_dim": [ - 5, - 10, - 20, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1024, - "cross_attention_norm": null, - "down_block_types": [ - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": null, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 1, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D" - ], - "upcast_attention": true, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json deleted file mode 100644 index a4972b5c9a0fb6be725dcaf6d03456d06c02d896..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/21150d7758de8fbb95c1.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json deleted file mode 100644 index cd55c34340ed6770489510adbdbd74e149c308bc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-base-1.0/a8218e1ae23f2aaf8834.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-base-1.0", - "_task": null, - "text_encoder": { - "architectures": [ - "CLIPTextModel" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "quick_gelu", - "hidden_size": 768, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 5, - 10, - 20 - ], - "attention_type": "default", - "block_out_channels": [ - 320, - 640, - 1280 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 2048, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2816, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": [ - 1, - 2, - 10 - ], - "up_block_types": [ - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json deleted file mode 100644 index 390dd6c309b9fec57082f09265f194bace6b82b2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/35bf7e5705bc85882085.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 96, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 96, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json deleted file mode 100644 index e6fe9f8a585e358882b746b47545f81451187af1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/stable-diffusion/stabilityai/stable-diffusion-xl-refiner-1.0/dbc7625ec3e1068324e8.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "_entry_class": "MultiModelCacheEntry", - "_model_id": "stabilityai/stable-diffusion-xl-refiner-1.0", - "_task": null, - "text_encoder_2": { - "architectures": [ - "CLIPTextModelWithProjection" - ], - "attention_dropout": 0.0, - "dropout": 0.0, - "hidden_act": "gelu", - "hidden_size": 1280, - "initializer_factor": 1.0, - "initializer_range": 0.02, - "intermediate_size": 5120, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 77, - "model_type": "clip_text_model", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": false, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_sequence_length": 77, - "task": "feature-extraction", - "tensor_parallel_size": 1 - }, - "num_attention_heads": 20, - "num_hidden_layers": 32, - "output_hidden_states": true, - "vocab_size": 49408 - }, - "unet": { - "_class_name": "UNet2DConditionModel", - "act_fn": "silu", - "addition_embed_type": "text_time", - "addition_embed_type_num_heads": 64, - "addition_time_embed_dim": 256, - "attention_head_dim": [ - 6, - 12, - 24, - 24 - ], - "attention_type": "default", - "block_out_channels": [ - 384, - 768, - 1536, - 1536 - ], - "center_input_sample": false, - "class_embed_type": null, - "class_embeddings_concat": false, - "conv_in_kernel": 3, - "conv_out_kernel": 3, - "cross_attention_dim": 1280, - "cross_attention_norm": null, - "down_block_types": [ - "DownBlock2D", - "CrossAttnDownBlock2D", - "CrossAttnDownBlock2D", - "DownBlock2D" - ], - "downsample_padding": 1, - "dropout": 0.0, - "dual_cross_attention": false, - "encoder_hid_dim": null, - "encoder_hid_dim_type": null, - "flip_sin_to_cos": true, - "freq_shift": 0, - "in_channels": 4, - "layers_per_block": 2, - "mid_block_only_cross_attention": null, - "mid_block_scale_factor": 1, - "mid_block_type": "UNetMidBlock2DCrossAttn", - "neuron": { - "auto_cast": "matmul", - "auto_cast_type": "bf16", - "compiler_type": "neuronx-cc", - "compiler_version": "2.17.194.0+d312836f", - "dynamic_batch_size": false, - "inline_weights_to_neff": true, - "optlevel": "2", - "output_attentions": false, - "output_hidden_states": false, - "static_batch_size": 1, - "static_height": 128, - "static_num_channels": 4, - "static_sequence_length": 77, - "static_vae_scale_factor": 8, - "static_width": 128, - "task": "semantic-segmentation", - "tensor_parallel_size": 1 - }, - "norm_eps": 1e-05, - "norm_num_groups": 32, - "num_attention_heads": null, - "num_class_embeds": null, - "only_cross_attention": false, - "out_channels": 4, - "projection_class_embeddings_input_dim": 2560, - "resnet_out_scale_factor": 1.0, - "resnet_skip_time_act": false, - "resnet_time_scale_shift": "default", - "reverse_transformer_layers_per_block": null, - "time_cond_proj_dim": null, - "time_embedding_act_fn": null, - "time_embedding_dim": null, - "time_embedding_type": "positional", - "timestep_post_act": null, - "transformer_layers_per_block": 4, - "up_block_types": [ - "UpBlock2D", - "CrossAttnUpBlock2D", - "CrossAttnUpBlock2D", - "UpBlock2D" - ], - "upcast_attention": null, - "use_linear_projection": true - } -} \ No newline at end of file