diff --git a/.gitattributes b/.gitattributes index bd1ce75d55cb7108ea0ce21860905d3faddc666b..aa22120df15195a3d3894a1a7ab52df470b99fd7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15032,3 +15032,19 @@ neuronxcc-2.21.33363.0+82129205/MODULE_d7f7c69ad6f63a27e1a5+a02c3a36/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_d7f7c69ad6f63a27e1a5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_09e98045a01eb4a75c24+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_09e98045a01eb4a75c24+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/0f6c4fdc5392f85cc1a9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/0f6c4fdc5392f85cc1a9.json new file mode 100644 index 0000000000000000000000000000000000000000..b830d33c584d5aea28e7dc6294e5c8ddf53618b0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/0f6c4fdc5392f85cc1a9.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/776cfdcbfedab12abaef.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/776cfdcbfedab12abaef.json new file mode 100644 index 0000000000000000000000000000000000000000..7171868c43f26f7a64b8f624c6fc2dfe19abcda5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/776cfdcbfedab12abaef.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/52a3b7d021f51c90337f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/52a3b7d021f51c90337f.json new file mode 100644 index 0000000000000000000000000000000000000000..25dc89b9850622ce214252c5c202f13cc7990091 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/52a3b7d021f51c90337f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/5fdba651620df09da93d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/5fdba651620df09da93d.json new file mode 100644 index 0000000000000000000000000000000000000000..de9077b6d9f87d01975e4bec75ee52ed988f84f9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/5fdba651620df09da93d.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/daa276345bb9b68e9be5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/daa276345bb9b68e9be5.json new file mode 100644 index 0000000000000000000000000000000000000000..0a26ed4319453dd8c2bbef2e758fc22415255e90 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/daa276345bb9b68e9be5.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/e4b573e1a33bbda76243.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/e4b573e1a33bbda76243.json new file mode 100644 index 0000000000000000000000000000000000000000..adb8195a4f2b3b60362a2b3cf977a1524504e4c4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/e4b573e1a33bbda76243.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.2-1B-Instruct/ae1848094edb282bfdf1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.2-1B-Instruct/ae1848094edb282bfdf1.json new file mode 100644 index 0000000000000000000000000000000000000000..77d61bcfb25131fa6726bc2a2076d2ccb0296d8d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Llama-3.2-1B-Instruct/ae1848094edb282bfdf1.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/c0cef3ac9ffe05625f39.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/c0cef3ac9ffe05625f39.json new file mode 100644 index 0000000000000000000000000000000000000000..72a9b21b7fcf05cb5604bdf118efb17bfeb1d4a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/c0cef3ac9ffe05625f39.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/683e082396a4bd7ab4cb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/683e082396a4bd7ab4cb.json new file mode 100644 index 0000000000000000000000000000000000000000..19b723b2e1c85b685156064a9631a856601258b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/683e082396a4bd7ab4cb.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/79f88038b5962d921f3d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/79f88038b5962d921f3d.json new file mode 100644 index 0000000000000000000000000000000000000000..785a9e2a711815916abfb6b200c0c4b993ac91f4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/79f88038b5962d921f3d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8b175859f95b9fa5a1db.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8b175859f95b9fa5a1db.json new file mode 100644 index 0000000000000000000000000000000000000000..519ce3d65d6de50d7a26f29e354c330f23f3bd79 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8b175859f95b9fa5a1db.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/93fa413575d5ccc52f58.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/93fa413575d5ccc52f58.json new file mode 100644 index 0000000000000000000000000000000000000000..1bdb8ae36f525bb1d7152dc523bc2eadd2de1549 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/93fa413575d5ccc52f58.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f453e7e94ffe46947fc0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f453e7e94ffe46947fc0.json new file mode 100644 index 0000000000000000000000000000000000000000..8f2fe3703a549b8955a21cc2e54eb3a508f6406d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f453e7e94ffe46947fc0.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/llama-3.2-1B-Instruct/c093ba8faccfd3d9f2c2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/llama-3.2-1B-Instruct/c093ba8faccfd3d9f2c2.json new file mode 100644 index 0000000000000000000000000000000000000000..c67f9544ca0b119477840bd1ff5a7cc1855b3a86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/llama-3.2-1B-Instruct/c093ba8faccfd3d9f2c2.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/86c6ae54af67ddf4b3b8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/86c6ae54af67ddf4b3b8.json new file mode 100644 index 0000000000000000000000000000000000000000..20874db847486c5c5c502b480575f24444324c65 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/86c6ae54af67ddf4b3b8.json @@ -0,0 +1,165 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/bb3ccea738a2a8e75fe8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/bb3ccea738a2a8e75fe8.json new file mode 100644 index 0000000000000000000000000000000000000000..16da7d1cce4549f120e66756284a4ccade63c069 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/bb3ccea738a2a8e75fe8.json @@ -0,0 +1,165 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/53917a90164d81443a08.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/53917a90164d81443a08.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f5b06b4b1607e9a1c7213d5a9c1c63276383d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/53917a90164d81443a08.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/b558da325694139bbaaa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/b558da325694139bbaaa.json new file mode 100644 index 0000000000000000000000000000000000000000..aa529396bfbf431cb4b76123897a0150ca3a594c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/b558da325694139bbaaa.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/1b21de7b61f3d14bbbff.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/1b21de7b61f3d14bbbff.json new file mode 100644 index 0000000000000000000000000000000000000000..15450fbf77dab14801bd8707e93082ef875a2095 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/1b21de7b61f3d14bbbff.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/ad8740a6bddc8f6b87a7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/ad8740a6bddc8f6b87a7.json new file mode 100644 index 0000000000000000000000000000000000000000..ed61ff2eb808ab3770ee5944010f91345ac933a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/ad8740a6bddc8f6b87a7.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/0165b063d3dfd73ec9af.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/0165b063d3dfd73ec9af.json new file mode 100644 index 0000000000000000000000000000000000000000..50e84b7b95401b4b70d20d4e7775e5e8511a5c82 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/0165b063d3dfd73ec9af.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/71933d2c6e6099113f93.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/71933d2c6e6099113f93.json new file mode 100644 index 0000000000000000000000000000000000000000..7bc54bdacc72f7f8eb770c1722956ba8c1808960 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/71933d2c6e6099113f93.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5ff1cca947e85af8100f02a971940353805dacbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60325dbe4e341d0836f3b68cedc6beaa300459af2c947445da4a545a3f703c10 +size 939272 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9cb4e7221016e53bbc4717c93186930ab43c066b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_110fc80e89006393f738+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e189bba6f572c67c0bd6e9bba05b58d4283f34c50588dd6aa97a34007311ba28 +size 6667264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..caa215de9ecc819b26539ea04f6d7b719b525bba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18c81c6ab7ffca4593ffd12280271d86ca9a2700a2770da93fb314a9109ff67 +size 509380 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..52e5de160ae6eb6f0bbcc4df34d8809b957ea935 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d13b5a6f47ee0e1c7c9b783470687dfcad53929b4035be0b071bbacd718948 +size 41585664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be719c4eee874db0bca312c82d2be934c3caba4e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed4ca4ead0b4a889a31f35680291eb4368e838532e40337ccf0a72e56de3aa1 +size 914383 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5ff399909d9f0520b83d7ac0d957b9989dda8ffb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aa5f4baa9354745d6a6+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a8ac13c543cfd26597bdef4bb0dc1c5fe6dafe9046db1c62b7d4e1fe1eb4bd +size 12606464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0b81853a05cdcc68ffd73e6a3e9d08e6e78a9bf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b254a5de285d4202654c3861698c1a55558bbae98505f678539066015a5d803a +size 474402 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4514fda120c3b85899add1e7948dbdd5a4edfa3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_37d321becc90cb687039+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b5479a35276997f52e0427e90139634e919e308bc9747f3ca820dec15d0382 +size 115057664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..74a51ce539ef7bf362be1fd8562d4d8c44949119 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcd5dc785b88fb271b709071dcba90051a1052d0548e3eb0dfd4d88940f24d8 +size 1036312 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..450ecc8be80d5bbb411270715c328a3e681110e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_39061f1efbca2332dc73+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e6e0d539f585138ff57fe8510d76ba99dc0f679c7886ffa295bb8396c1bd9f +size 15893504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0f9d614d6fbfc4fedae88878100cd8c56775eb34 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92413edeab8bdd1ff79c68aeeb0633faf78e28df1df24ce6e6685975d7156ff1 +size 426769 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b0c5bb7044ce4121848c7cce0cd1761f202131a0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c238600da6a768547204d176f9f7adf55db06bcaf6e1cc9a978896b92c42e21 +size 3073024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e507f94b047ecc7bc3a351e6f361a5c450ee27eb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5cfe268b844f7d2286a5+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e033c437c96db2869f8b8620b5b2eb24b6fad3e04fd4f0b417f6df1c3558ef +size 3147125 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc31afd593bab43dcdb6fcf84b0b77140dc48669 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afbf1c2497a38503bf2c1c9685d3d551b5461b11c9eedd45100bee098d75959d +size 978844 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa0e1b0c0391ef035fef648a0597fbf582e0a8d4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c1ab0225123c184780f+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8741039102da2965de796bf40e6538a49f53e0cebf245a4a5d30530136794056 +size 6718464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e638b5c7cceb8e7260606f893839002ebcc6cdd1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a524a8593eaa11d4c64b9633ad252e08702bde8a7a43768a07e75baf9423c7cb +size 1049377 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3bd3ef7a392bb9414cf9adef02bd9f42bd18f86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88399bbf2a34b1e28eee+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db85008650868e4f75f4a65d80144ea6b7e547796f3ec4fe4e3b666c0457657a +size 23000064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7d4ffbf8209549efc7988afdcab7f584f5903097 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2560de39994ba4784626257a4845c5ad7af11cf2ce79e7df7ac1e29ba15f33ec +size 434264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a694d2da58b8a6f6c259b838b5084093ca8f877 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2586694f84bb7527a209850d0dd49d48990faacd1d4e4c72602d5e32573172 +size 2366464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..44df764c07e874c668ad74d61b57ec1545660506 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b8221eb10f6b4eb2f68+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce92ab94bac6c24d2adf4d57bc2afbacfc4ec63abecfe9b3a06f395d6077f1ed +size 2440551 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb2212bee3cacbebd86a3a4b2b98d2e9ecb64f89 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21388ebcbb2ad22728e2aad4f2f7eb57d3c8a04c59a81df97f9e39aa18f57b5 +size 507614 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..77e864ed0e4d66df411cb76423dd652185d8629e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ebe7aa5e1ba33d7e2cb909b8df9ee8d25f1f1878a4dde47c74622a01bd3d37 +size 114013184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..26150d061de2289f7d9c44e931b05a75bb2d1b55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8406378e6e18cc6ac3eda37f281d580f7b4bbec015dc27ac8369fc44b705526f +size 809145 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ec6b53b83b27906e17be8e1914e4985afe90286 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a3a82e0d0626a80a2c5e6898d41e9ca22a72fea4b945f1555508c37787c263 +size 21863424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..94896c041d86842ff8fc1a2c3731c13ac84c8953 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cfd70138eb9722ac2255+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec03e8b7d7deca9b162d16f1d57b4954f12796e59c0cd1e9bd7edbca49eff15 +size 22011354 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1bf707e116369c98d18265e77ad5c232115e427 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f057068b35f8aeb0d565f76649c32016184243a96809f860114d903a6770d7f +size 770051 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c2a4624ec46d0a3da5610fef4aa61252cdc1d794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695538f706036d1034f9ea8fb7269338dba1a78a37fc131646f03c7496f1239f +size 21801984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bbc75ed359ab77dd0d8fd84ec0a78b27d2ae7821 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dca44e3cebd5178ce615300b84bd8ad2e980993e5fc672a7bb53b6576a4e981 +size 21949799