diff --git a/.gitattributes b/.gitattributes
index 3c55113956bb6b041ae6e07b068fb3c351936fe2..feec107cad1c05ee0a86e47a3a05985cac0cce07 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -12907,3 +12907,16 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_16155383408857099159+e30acd3a/model.neff
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1650383904864671264+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.18209.0+043b1bf7/MODULE_18248527042358605061+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7690071555322555841+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/model.neuron filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev7/wav2vec2/hf-internal-testing/tiny-random-Wav2Vec2Model/3fb52d732cf73210c52f.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev7/wav2vec2/hf-internal-testing/tiny-random-Wav2Vec2Model/3fb52d732cf73210c52f.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc6443374f075d65eb05a03e0b7685b2af0ce139
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev7/wav2vec2/hf-internal-testing/tiny-random-Wav2Vec2Model/3fb52d732cf73210c52f.json
@@ -0,0 +1,113 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "hf-internal-testing/tiny-random-Wav2Vec2Model",
+ "_task": "audio-xvector",
+ "activation_dropout": 0.1,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2Model"
+ ],
+ "attention_dropout": 0.1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 32,
+ 32,
+ 32
+ ],
+ "conv_kernel": [
+ 8,
+ 8,
+ 8
+ ],
+ "conv_stride": [
+ 4,
+ 4,
+ 4
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.0,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.1,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 16,
+ "initializer_range": 0.02,
+ "intermediate_size": 20,
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 2,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.5,
+ "model_type": "wav2vec2",
+ "neuron": {
+ "auto_cast": null,
+ "auto_cast_type": null,
+ "compiler_type": "neuronx-cc",
+ "compiler_version": "2.21.18209.0+043b1bf7",
+ "cpu_backend": false,
+ "disable_fallback": false,
+ "disable_fast_relayout": false,
+ "dynamic_batch_size": true,
+ "float_dtype": "fp32",
+ "inline_weights_to_neff": true,
+ "input_names": [
+ "input_values"
+ ],
+ "int_dtype": "int64",
+ "model_type": "wav2vec2",
+ "optlevel": "2",
+ "output_attentions": false,
+ "output_hidden_states": false,
+ "output_names": [
+ "logits",
+ "embeddings"
+ ],
+ "static_audio_sequence_length": 100000,
+ "static_batch_size": 1,
+ "task": "audio-xvector",
+ "tensor_parallel_size": 1
+ },
+ "num_adapter_layers": 1,
+ "num_attention_heads": 2,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 2,
+ "num_conv_pos_embeddings": 16,
+ "num_feat_extract_layers": 3,
+ "num_hidden_layers": 4,
+ "num_negatives": 100,
+ "output_hidden_size": 16,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2
+ ],
+ "tdnn_dim": [
+ 32,
+ 32
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3
+ ],
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 32
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/unsloth/Llama-3.2-1B-Instruct/4e2587deae1fd9fd0bc7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/unsloth/Llama-3.2-1B-Instruct/4e2587deae1fd9fd0bc7.json
new file mode 100644
index 0000000000000000000000000000000000000000..72384e6559d3c0b389412b13b7e66f3be4c65b6c
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/unsloth/Llama-3.2-1B-Instruct/4e2587deae1fd9fd0bc7.json
@@ -0,0 +1,63 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+ "_task": "text-generation",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 64,
+ "hidden_act": "silu",
+ "hidden_size": 2048,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 4,
+ "capacity_factor": null,
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 2,
+ "max_batch_size": 4,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": true,
+ "optimum_neuron_version": "0.4.2.dev0",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 2
+ },
+ "num_attention_heads": 32,
+ "num_hidden_layers": 16,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "unsloth_fixed": true,
+ "use_cache": true,
+ "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/Qwen/Qwen2.5-0.5B/03d96beb4e117fcc86da.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/Qwen/Qwen2.5-0.5B/03d96beb4e117fcc86da.json
new file mode 100644
index 0000000000000000000000000000000000000000..d4fc505f223760f8c6ca6832d99966ff697ada1f
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/Qwen/Qwen2.5-0.5B/03d96beb4e117fcc86da.json
@@ -0,0 +1,82 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "Qwen/Qwen2.5-0.5B",
+ "_task": "text-generation",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "hidden_act": "silu",
+ "hidden_size": 896,
+ "initializer_range": 0.02,
+ "intermediate_size": 4864,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 32768,
+ "max_window_layers": 24,
+ "model_type": "qwen2",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 4,
+ "capacity_factor": null,
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": false,
+ "glu_mlp": true,
+ "local_ranks_size": 2,
+ "max_batch_size": 4,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": false,
+ "optimum_neuron_version": "0.4.2.dev0",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 2
+ },
+ "num_attention_heads": 14,
+ "num_hidden_layers": 24,
+ "num_key_value_heads": 2,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "use_cache": true,
+ "use_mrope": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f2e7995fb50bc34695b8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f2e7995fb50bc34695b8.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad2defd81d16cc867c78e0584cf333891a7d6865
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f2e7995fb50bc34695b8.json
@@ -0,0 +1,87 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+ "_task": "text-generation",
+ "architectures": [
+ "Qwen3ForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 1024,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 32768,
+ "max_window_layers": 28,
+ "model_type": "qwen3",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 1,
+ "capacity_factor": null,
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+ "continuous_batching": false,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 1,
+ "max_batch_size": 1,
+ "max_context_length": 1024,
+ "max_topk": 256,
+ "n_active_tokens": 1024,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": false,
+ "optimum_neuron_version": "0.4.2.dev0",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 1024,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 1
+ },
+ "num_attention_heads": 16,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "use_cache": true,
+ "use_sliding_window": false,
+ "vocab_size": 151669
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f6847065b14c4394e8d2.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f6847065b14c4394e8d2.json
new file mode 100644
index 0000000000000000000000000000000000000000..884880b37fb9c2f595feeafd021510e559f84301
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/f6847065b14c4394e8d2.json
@@ -0,0 +1,87 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+ "_task": "text-generation",
+ "architectures": [
+ "Qwen3ForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 1024,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 32768,
+ "max_window_layers": 28,
+ "model_type": "qwen3",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 1,
+ "capacity_factor": null,
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+ "continuous_batching": false,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 24,
+ "max_batch_size": 1,
+ "max_context_length": 1024,
+ "max_topk": 256,
+ "n_active_tokens": 1024,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": true,
+ "optimum_neuron_version": "0.4.2.dev0",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 1024,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 24
+ },
+ "num_attention_heads": 16,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "use_cache": true,
+ "use_sliding_window": false,
+ "vocab_size": 151669
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/63a68fd36a9fc7c4bc7f.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/63a68fd36a9fc7c4bc7f.json
new file mode 100644
index 0000000000000000000000000000000000000000..a404f5f0f1631755e46831eedf040cc21d7a6e4b
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/63a68fd36a9fc7c4bc7f.json
@@ -0,0 +1,62 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+ "_task": "text-generation",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 32,
+ "capacity_factor": null,
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 8,
+ "max_batch_size": 32,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": true,
+ "optimum_neuron_version": "0.4.2.dev1",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 8
+ },
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "use_cache": true,
+ "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/7fd48e77448b0182b423.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/7fd48e77448b0182b423.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f6e0cced16c6b909d5ce58ed1deb28418202e6f
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/7fd48e77448b0182b423.json
@@ -0,0 +1,62 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+ "_task": "text-generation",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 8,
+ "capacity_factor": null,
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 8,
+ "max_batch_size": 8,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": true,
+ "optimum_neuron_version": "0.4.2.dev1",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 8
+ },
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "use_cache": true,
+ "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/unsloth/Llama-3.2-1B-Instruct/7d08bf5fb4f9a14c4981.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/unsloth/Llama-3.2-1B-Instruct/7d08bf5fb4f9a14c4981.json
new file mode 100644
index 0000000000000000000000000000000000000000..95326bece75a383b2cf57edf77c2c9f62af56463
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/llama/unsloth/Llama-3.2-1B-Instruct/7d08bf5fb4f9a14c4981.json
@@ -0,0 +1,63 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+ "_task": "text-generation",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "head_dim": 64,
+ "hidden_act": "silu",
+ "hidden_size": 2048,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 4,
+ "capacity_factor": null,
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": true,
+ "glu_mlp": true,
+ "local_ranks_size": 2,
+ "max_batch_size": 4,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": true,
+ "optimum_neuron_version": "0.4.2.dev1",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 2
+ },
+ "num_attention_heads": 32,
+ "num_hidden_layers": 16,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "unsloth_fixed": true,
+ "use_cache": true,
+ "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen2/Qwen/Qwen2.5-0.5B/262017b899fd3b5b56c8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen2/Qwen/Qwen2.5-0.5B/262017b899fd3b5b56c8.json
new file mode 100644
index 0000000000000000000000000000000000000000..52ee2c2f6ac5e5a98a6ec4f92c087a20ec85410e
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen2/Qwen/Qwen2.5-0.5B/262017b899fd3b5b56c8.json
@@ -0,0 +1,82 @@
+{
+ "_entry_class": "SingleModelCacheEntry",
+ "_model_id": "Qwen/Qwen2.5-0.5B",
+ "_task": "text-generation",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "dtype": "bfloat16",
+ "hidden_act": "silu",
+ "hidden_size": 896,
+ "initializer_range": 0.02,
+ "intermediate_size": 4864,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 32768,
+ "max_window_layers": 24,
+ "model_type": "qwen2",
+ "neuron": {
+ "_serialized_key": "NxDNeuronConfig",
+ "batch_size": 4,
+ "capacity_factor": null,
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+ "continuous_batching": true,
+ "ep_degree": 1,
+ "fused_qkv": false,
+ "glu_mlp": true,
+ "local_ranks_size": 2,
+ "max_batch_size": 4,
+ "max_context_length": 4096,
+ "max_topk": 256,
+ "n_active_tokens": 4096,
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
+ "on_device_sampling": false,
+ "optimum_neuron_version": "0.4.2.dev1",
+ "output_logits": false,
+ "pp_degree": 1,
+ "sequence_length": 4096,
+ "speculation_length": 0,
+ "start_rank_id": 0,
+ "target": "trn1",
+ "torch_dtype": "bfloat16",
+ "tp_degree": 2
+ },
+ "num_attention_heads": 14,
+ "num_hidden_layers": 24,
+ "num_key_value_heads": 2,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "use_cache": true,
+ "use_mrope": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0f9c2bceea0eb4b649eb6301bae482ddfe1bd31e
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a6ce6eb6a6e9092fb242da88b081169884a4e9ffb86c8f61e48dde2a9df68e9
+size 658089
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2310b759256ba540b92b0c94fc13fe2d4b6a504c
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4b22075302d3aff9fbc0ac3d3830740c1622603352852610cba25e704ec601b
+size 717824
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/wrapped_neff.hlo
new file mode 100644
index 0000000000000000000000000000000000000000..55482a97de0067070ffeec5736527d278c1cfc2b
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0586f99d912ae4eebd66+a02c3a36/wrapped_neff.hlo
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79fd08910a986ab1b585a9f2e6fbe14c19a3321014652f25f8dd1d1274ebf7ee
+size 853136
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb
index b6d58bfddea9fff9d33e332a71aa7e3df064e510..b7ad9109de8912d9f01033608b6b144af2d6f83c 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:ceb0a23b716925f843a16e15f9f0385f067950c8f9f55cd7b3db583ba03b1a7e
+oid sha256:e836c6bbdddd0f87e50203756e12757180235ce3d83d59d598f073d7c76e335b
size 84807
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff
index 65c3e0dc513174a5405638385f4e4fd1b6fb4ca4..c5c39999d71effb03638361980fd6d1c2b85f6d7 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:0f26bc2e40e6e21bffa40348dd28785a0c5ba6ae26126553c85435559849af5d
+oid sha256:b1fb27b3c750546f4a06036bc56bdc41a2a97288350a5a0427e04855594a6cb1
size 646144
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff
index 46cabc60bcc88fb29895bd63d7b5b98496c9998c..47b2d23efdec25948d751e5baad723efe5b9a39e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff
index 998f9c266292836a3fb9e502cc4a677d6f661a3c..510b036b61c7b9600ae9939ba04efa41aacc911a 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff
index 9a78649acdf9c9ff01e2d0cfc108b5d53729db4b..b3e975f7f00da9c640759fdf60fff34aebae9872 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff
index 2742ed78f4cb2ae4b1b8c9082913b566abb47ac4..72aeee836a900438c9b69b214d32510e64830888 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff
index bf545a021307d38d5b947059fa4e1c5954b69000..ca9a7926d1dd5da1979461b2e1b7a4d9ba4e5b92 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff
index 3e48b42c11dffc635c15f0d7d6087506de9e6782..f8850142d42d734eef03aeb74aeb52b52f6ab92b 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff
index ec0d32f6ff51f2f95eea6438e3325377af2935c8..8620ffb79cc29393b853a86d962d0523a1f842c9 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff
index ce8d4604cfb0118518c99966b9cdd50d579e0bfd..48ef8cf6170dadd8b37cc9d86f2a440795d9b71f 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff
index 88d77819c765a8a16e0f8a732443cc0c3575f3bc..cda36a864cbdb9a858f02fdb99b859ee90f0cb7e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff
index 2fc95f47fcc3fc216d9cc675fd464c3b1f116a1b..c109d2153de1428bc7f6b831cc32d75112addc2c 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff
index ee41a4cb2dcc67f093ec3cffbed373a0e3b4c755..51292e8a7b566810c329d2359183c57f81b6e51a 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff
index 0447e7de2e631ee668a68485e467ea384ba0626b..6f7ec76ca6ad471ba4d0af06ba86aceeaecd43ae 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff
index d75abe563ff530c7c4d2d078b48a21cca1249571..51c937be941b45c91057d69a89965edc798f8ba3 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff
index 46ad11cae7db15540bb26566b22b49870f993347..790ab1b929b30a0667d7832261dd3345be6a21c8 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff
index 9c8d26380f1557da499b9a6fb92036cb349d083f..b52111b85f89973185feb66cce513fdef076606b 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff
index d43889ef27931233d2093b47978a4c0cca4566a2..d6152c240921640a8e222af704527111bd5e10c1 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff
index 71f70ba962ce89e86ee226533c52e94b88fd44e2..b438ec3c467e19398fdde99785dca45028455950 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff
index bce5ab16e6121f0b237cff76d543046e6a7a4188..0198c4307cd10fd94b8ed28281a28853dc79ae1c 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..3d86e59c12fa784c9f070b9ad15ebbdd1acc9d31
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3468b6e74d336f18fe28b6fe457a52cef249b9630225261813c967bf71d19e44
+size 586051
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..a0ba3432717c31f6628d207b81eae4bfec2a1877
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14271466b8fcc2e97e04+24129607/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b54408c237472906fdcf6d1aa87649570b887f3690627b94b699d9fc666dea5b
+size 7097344
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff
index 20439e380c2d1cdedef21ffb85c89b97b53cdc40..1a2a776e730c32dfdbd090b306aac61294fd7799 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff
index 6dd7f26f44eadb4a7719066077d9ee63699b9af6..9fae89a6d64d310fa70c188e489c8b519e665f7a 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff
index d9bf1ee2452e135923c9ba0456a953fa18e90003..1a51175e4161b8282f74f48175acafdb7bea24c1 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff
index 7089f292a699cf97753d73a29b2df453a6d1c2bf..de1f53afa2f144d74ea9a59819187f652b33d515 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff
index 3fad515f87035553b5027ae19d83b7284bec8e82..094e1f28b26e3f3ae177a376d08aeaa9e7602363 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff
index 7f81b0939673d8368d6361f4d0072ffe9c354713..ac9b8087d048fd5acf839338584db9baa640814c 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff
index 9070838855bea33c26e9ea35a581c4be1acabc5b..166373cf7e47e2ae459850e13214d0efe50165c7 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff
index f6501b2548ca486bf23eb83384ca6ca7ab6411f2..9a768af4c1b38353df99768fd4e7c1f136652e0a 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff
index 3ba7cf11c291130ab3e3f3a4c97691ae0d0f05a8..0713b1ee9d7c34a3c2317ca4e0553cb0891882d6 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff
index 2fe2c3226834ba44dfd3f0b9334fbf63666c9872..eddb872ccb08fa1e769242eeef634f302ab6b319 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff
index 2ebfee66f4d490a1da2124c5496b960215675229..29aafc038ba470a56ca4efe6becd7b3abd08749f 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff
index 832cd25449e8338f9c4be294eed390f2c616e0d7..9ca542af2d7a0c2104a343e262a2b18e75b3b054 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff
index 80c23e82b6a51cdb3797bb237cef3a33593c98a9..09e213a0026edc1b564a1d5ec9e09c31091de85d 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff
index 4b034266cbfc2d64d674e7a6fcd9e2b7cae50452..f23030715dc11edf08210512f85c7c2dd90f775b 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff
index 1537f4617f6154f6af2e859aabe50b1c57bf2b1b..7ac530bed8968e1e1c8db3b81a09a24a69961f8d 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff
index 920023191f4ef8ada430481cae37d2447aa8fa7e..a76ddc280c313d4b71dbd6cd240e891d351b4a93 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff
index d3af2cf04b8098621b5737dca56d4f294effad4d..774dce31159675a2e679614c1d26b8b2093fca5d 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff
index ebf3945fe21c45aa7ac49351d4f7b1c9488bc04a..b7d58cf24f4baa0d2aaa8e18ccbbf325b9e85d69 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff
index d5f4da09dc6fa0810e2417e69b525a78251a402a..21e3aa8467421e2c3a392d8dfb6c4a2354396f16 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff
index 1f9793a24c62e897a4481784500e50eb69107210..3294c17edac1668b75a8614d58d41ebf59e14d04 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff
index 1e26638767893f2220cdf7e666074ff163b798d7..d92d4f35f5503b2792418d1ce2e91b3d19459482 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff
index c63b44bbc3af7dc5d436059731ec87a161648748..431a76fa0e10f21e1269dc5164532f4d9a940a89 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff
index 920f3f4f4787295595b25525271d36e248d9aa69..27414ee23ae5fb2f308b2529629617ae0de511e8 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff
index 4ba37c21c3c45e52ebdfe324fcc4c989ea642ebf..66568301c42b514ad3f23ace555609d186b2042d 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff
index 685e43a3afb6be45d1a84b0f8cce25535103ffcf..36531fe9eabde65764a31413b863b1a7fb823e8f 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff
index c90b163e96929af971063f8499083f9701a5e71f..dfebfd01b9bd1d28626c6cc47517d7bf4ed3dec3 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff
index 34fc858b2db7224efe645a33b45a413ea53543e2..92f5531b0dbb9add7069b9ea498099f671541a50 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff
index 6ff7e2c0315aeb33b1e8ee2fe5b817ff3f76d8a4..055a09c9aff230852524e0d9b1989d793ad8e6e5 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:690971e38e78e6e8e2e0b2249904244dbe845f18a1d36cb2b763deac5fe550f3
+oid sha256:f777c8a6c6182a9ee0b20468799c6eaaa8cb553d672d63ca3b99e45cdf486c7a
size 277504
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo
index 0605fedfaecfe63a3d5054643fbef3013bda8c52..a6bfb26b27865438bf59b9aff33040ebf5fb48ee 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:937aba417c7465954a0bf71e8d536e6ce2db41d44b477deb724f8f6154f76e19
+oid sha256:26a3ff0b3bd57e8ea0e9d0c8e01d70430f1d70170e0848f191a9deafb049c3ca
size 285854
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff
index fb7275408c4a0ec32ddc21e52a6a36e87b7ea4f5..1df28e30dfc9c639daec312473f03de3d7d87ecf 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff
index 394c147992a80dc61c65cfb271c654d1852c968c..a212832d0e2624c58f8ee53bd0630893ed673717 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:adc574992e72adfe26c916de4e344ec745c5f0367620c1183437c3127f9fef15
+oid sha256:6cfc7cbbff8d344e9bc2d0fd5813934fc2a0791f3f5d38fa7fa5ded8233e875a
size 246784
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo
index c81f0318b71b9bbc335a042b83a9e87ff72482ad..f4027d975926441e69957d9e9b4c6ddcbe022c8d 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:6e3995a13290761f8f6f89854871f6a5da48002e637efad7b67cec72e826cc1e
+oid sha256:4111305c441220fb5cd9f833015c8d7e05357a07dd5304f5836ce7154c885835
size 255104
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff
index 515638783f74a85409c480fa99aaded2b1b81c47..ebb1422ed9574ed3c3dd6210b8ac46dada7b541e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f38b46d229a62a4e95a2e10819b709bd04836382
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7cc4e882b5787914fc23d4ecd96ce3c7f39fff50d843b803f6b32e590745f13
+size 587688
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..c61a48d1e3ec8428ffb807de7379ef621e7f135a
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01f77b1f18301937e37a82f6538983b31edc87174150b29505181be7202c111e
+size 666624
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/wrapped_neff.hlo
new file mode 100644
index 0000000000000000000000000000000000000000..db5f3adfbcab932f83aa88ac6bb5ab5ae469c5e8
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_37a815b3d3073ff820db+a02c3a36/wrapped_neff.hlo
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd348c349ee6e7691ebe18d673c0a9b84d495723a379316c5d1042b0cdd2b17e
+size 801108
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff
index 77db2ca3c0405ddde7e15785f24107027e10b75e..49ddc879c04dbabda5ac46b6e4a5a5facb20fde0 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff
index 60bc91e19d24ed4c6c793e5c36141552d4c5369e..80d5ce188a8cbcf9148bb9796eb62740d7133a63 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff
index 71a3bdfe44c2c40ddc76a23f4e943654c9c4ca28..3ce95586e15141bda2706ab5e8d82a867958c972 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:40684a5208697e733fd92b531e426c9a49e2e2f3dce8fdc81afe434818f265f2
+oid sha256:42b6f498c3a5ca74904642206c52be1b2b15f548344985f9be20d2ccf6e772c2
size 216064
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo
index e878da9fb5ba13604b80452f8837abbf73e8bde8..0fdb46964717354f28033fb9b0c362d333b255b4 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:e490cb2df34ae3582e58811ed1a9ea678ca780e9525b51113490839860539904
+oid sha256:c76cfeb5ab013b9aa1ff4410f2bc90a7559e7e46e7dfc93a562a408e90346603
size 224412
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e29eb0931ffbfeda6a69b20cbf8b9971529297f2
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/config.json
@@ -0,0 +1,116 @@
+{
+ "activation_dropout": 0.1,
+ "adapter_attn_dim": null,
+ "adapter_kernel_size": 3,
+ "adapter_stride": 2,
+ "add_adapter": false,
+ "apply_spec_augment": true,
+ "architectures": [
+ "Wav2Vec2Model"
+ ],
+ "attention_dropout": 0.1,
+ "bos_token_id": 1,
+ "classifier_proj_size": 256,
+ "codevector_dim": 256,
+ "contrastive_logits_temperature": 0.1,
+ "conv_bias": false,
+ "conv_dim": [
+ 32,
+ 32,
+ 32
+ ],
+ "conv_kernel": [
+ 8,
+ 8,
+ 8
+ ],
+ "conv_stride": [
+ 4,
+ 4,
+ 4
+ ],
+ "ctc_loss_reduction": "sum",
+ "ctc_zero_infinity": false,
+ "diversity_loss_weight": 0.1,
+ "do_stable_layer_norm": false,
+ "eos_token_id": 2,
+ "feat_extract_activation": "gelu",
+ "feat_extract_dropout": 0.0,
+ "feat_extract_norm": "group",
+ "feat_proj_dropout": 0.0,
+ "feat_quantizer_dropout": 0.0,
+ "final_dropout": 0.1,
+ "hidden_act": "gelu",
+ "hidden_dropout": 0.1,
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 16,
+ "initializer_range": 0.02,
+ "intermediate_size": 20,
+ "layer_norm_eps": 1e-05,
+ "layerdrop": 0.1,
+ "mask_feature_length": 10,
+ "mask_feature_min_masks": 0,
+ "mask_feature_prob": 0.0,
+ "mask_time_length": 2,
+ "mask_time_min_masks": 2,
+ "mask_time_prob": 0.5,
+ "model_type": "wav2vec2",
+ "neuron": {
+ "auto_cast": null,
+ "auto_cast_type": null,
+ "compiler_type": "neuronx-cc",
+ "compiler_version": "2.21.18209.0+043b1bf7",
+ "cpu_backend": false,
+ "disable_fallback": false,
+ "disable_fast_relayout": false,
+ "dynamic_batch_size": true,
+ "float_dtype": "fp32",
+ "inline_weights_to_neff": true,
+ "input_names": [
+ "input_values"
+ ],
+ "int_dtype": "int64",
+ "model_type": "wav2vec2",
+ "optlevel": "2",
+ "output_attentions": false,
+ "output_hidden_states": false,
+ "output_names": [
+ "logits",
+ "embeddings"
+ ],
+ "static_audio_sequence_length": 100000,
+ "static_batch_size": 1,
+ "task": "audio-xvector",
+ "tensor_parallel_size": 1
+ },
+ "num_adapter_layers": 1,
+ "num_attention_heads": 2,
+ "num_codevector_groups": 2,
+ "num_codevectors_per_group": 320,
+ "num_conv_pos_embedding_groups": 2,
+ "num_conv_pos_embeddings": 16,
+ "num_feat_extract_layers": 3,
+ "num_hidden_layers": 4,
+ "num_negatives": 100,
+ "output_hidden_size": 16,
+ "pad_token_id": 0,
+ "proj_codevector_dim": 256,
+ "tdnn_dilation": [
+ 1,
+ 2
+ ],
+ "tdnn_dim": [
+ 32,
+ 32
+ ],
+ "tdnn_kernel": [
+ 5,
+ 3
+ ],
+ "torch_dtype": "float32",
+ "torchscript": true,
+ "transformers_version": "4.55.4",
+ "use_weighted_layer_sum": false,
+ "vocab_size": 32,
+ "xvector_output_dim": 32
+}
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/model.neuron b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/model.neuron
new file mode 100644
index 0000000000000000000000000000000000000000..d0659281f2bdcbe51191dff675fd3fd374764d69
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/model.neuron
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b758ca2b98ba9695fe2510a9b21e2924f0f0045f9bbf3c97573f9199f26500f
+size 836087
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/preprocessor_config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/preprocessor_config.json
@@ -0,0 +1,9 @@
+{
+ "do_normalize": true,
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+ "feature_size": 1,
+ "padding_side": "right",
+ "padding_value": 0.0,
+ "return_attention_mask": false,
+ "sampling_rate": 16000
+}
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/special_tokens_map.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+ "bos_token": "",
+ "eos_token": "",
+ "pad_token": "",
+ "unk_token": ""
+}
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/tokenizer_config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f27aad8f8d4422d8078147872a0b0071498cc5e3
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/tokenizer_config.json
@@ -0,0 +1,51 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": true,
+ "single_word": false,
+ "special": false
+ },
+ "1": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": true,
+ "single_word": false,
+ "special": false
+ },
+ "2": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": true,
+ "single_word": false,
+ "special": false
+ },
+ "3": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": true,
+ "single_word": false,
+ "special": false
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "do_lower_case": false,
+ "do_normalize": true,
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "model_max_length": 9223372036854775807,
+ "pad_token": "",
+ "processor_class": "Wav2Vec2Processor",
+ "replace_word_delimiter_char": " ",
+ "return_attention_mask": false,
+ "target_lang": null,
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
+ "unk_token": "",
+ "word_delimiter_token": "|"
+}
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/vocab.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..7efc55a6619a53189e1ca2b5bdcca400a2d4f3e6
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3fb52d732cf73210c52f/vocab.json
@@ -0,0 +1,34 @@
+{
+ "'": 27,
+ "": 2,
+ "": 0,
+ "": 1,
+ "": 3,
+ "A": 7,
+ "B": 24,
+ "C": 19,
+ "D": 14,
+ "E": 5,
+ "F": 20,
+ "G": 21,
+ "H": 11,
+ "I": 10,
+ "J": 29,
+ "K": 26,
+ "L": 15,
+ "M": 17,
+ "N": 9,
+ "O": 8,
+ "P": 23,
+ "Q": 30,
+ "R": 13,
+ "S": 12,
+ "T": 6,
+ "U": 16,
+ "V": 25,
+ "W": 18,
+ "X": 28,
+ "Y": 22,
+ "Z": 31,
+ "|": 4
+}
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4216092561315976987+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4216092561315976987+e30acd3a/model.neff
index 39f17b6107e96fe8531a2e0d7ce60f825082990b..4a85bfcf7865f2b7f260a50b6a609e34620cacdb 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4216092561315976987+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4216092561315976987+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff
index efad80c3e364d7c9ef830814f6b506c5cd89c8a3..1e93c59bb3313fe7c7511f6ca69b52f564b3f5de 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff
index 70d6eba60130a5fde6d60245c4f38c652e6ab8f4..701b62c4666b41195f57ad786b8079c7647792d1 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff
index 569e8c3bb9f118d41deecc255b2239283e6247f8..2c1797351aa02958edfdf014d0951e6a53954729 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff
index a5fa8ce5d6e7ab2663115779c4ee97950bafb963..201066e03e9d5ea24f043f1a65a96c07c23eb489 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff
index e5ec1ceeeacb93b80e4fb279e672122f9373dc07..d10e4751cd2ca9ba3370519ba587e03c71893d9f 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff
index 784cc8d976286a993854c621493454c4b4725128..af824e45e759c2a6ce926ea3415c2d13a61d7a6e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff
index 4b9b594117b141cf4562b41cffb26a6452d9cacc..2bc4c77d156930ece350bfc902cd2b3c8e594a9a 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff
index 58320f9ef42a942a56691a2e0fb27648b3755946..b8012feff4c70e7429365b109f89027be05fe143 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff
index bd77c56abdc819e303d1d205e6954e190034ab70..1faa18fd91edccfa86990030f9a938a48a39219b 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff
index 99f34c7e1d224d0590c83a7b8a0175c3fdfdebbf..c18f641061aadb246b042b657b1bf9a1a4a18aa5 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff
index ce525309553c0945f60bfc1f12dee7357f2b1269..08309d31c1537c92f7de8484f165c027afc25778 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff
index 8cd0ba216875d1100e9d5435cdce8155ef8f1756..c0eb99fd46aca922c3d23c2b3c287c0e23d82d3e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff
index c82e8efe03af5de748e69cd51dc01f1596f5c640..56698eef51b562845b7c4b36576355ec24e09fcb 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff
index 2916b3c791c6e86e6519d9605dcbc80ec5f2f009..d0c6495c22148345331fa85585b24a9c3b0007fc 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:c0d1a66ec4620a7bbd95ddcd3f0b8563e7b9fd48c9167bbb83e09de5ea8f2045
+oid sha256:01a4d18eda6164a6b50527f6a7180ae45148b066dd6d0e033f234dde926d5107
size 369664
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo
index 00f1bf5626daa38aadee5cdaaeaf15232e382a9c..10b11a088c299a7218ccf835f6cd8752bafc259f 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:6d780e7908fb2043598326a014149c1adc7cf79ede14eaac7e57a0efd5aaaf1f
+oid sha256:e91b3548b13cc03a8e67cd2c8c4e3376fa4e77938edf443c5d0e1063602633e6
size 379362
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb
index 82b6d63ef07e8b99f982e5b2a216f4fb928ab10b..4fadaefc88575cf4d43531bb09c28f4cc8c7528e 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:4b57a81178b87e2fa8d72c53f158790060e6f323a7ea625353c0c2ecec75b33d
+oid sha256:c8513c0189cdb83b5fbc46c86bc3fd8861c79ec2fe2618b54dcf0b84b52e1eac
size 694128
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff
index 076477cfd754342b7fb936e67752c55b6434393d..eda7469406c9d53e35fac637a9a4067ef68c5d34 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:b94b84c6a5ac7142334a119a16754827bff07c49ce7dd2d1b9f71d6c2d29a755
+oid sha256:ba9b644f373493b3733e39fc800d88c37c5f0f52508377ff9ac12bec90f2b1b8
size 625664
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff
index 7f760b9adfd50e90a90cacd1278622ae47fb4fd3..6ea53da95e1416b2ff03d9c0d32796a618ddda41 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff
index 3cf0f5b28104289aeb13cf0eee4b1336f4a02808..4c3382f2cfaa645ae104d42958fda1a56a305a10 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff
index 4c1d1e5c6f828f42ffef185475b8adc3c9aa5738..f3dfea01a40834be6354033fb1dbfb93fb2559ef 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff
index a07e5af477a6ea33164e9ae2fde2460984ea597f..2ceca84e934b6cca16c827e054d804aac2e0bf12 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff
index 492d779de8a765c9844a5cdd8dc6246abea10acb..9ebc81577b01eeb00bee14030e799d4f51b604c1 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff
index 553da315b6da0a9d3cddd5d23e1ed2f1e522dae0..42a3be7f2772ae82deaa8dc18656112d85901729 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff
index 98b108794ffa2416c1ad516202003f75d809b4b3..49ea00276e8f8efa4eb05461db51beb12d302b18 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff
index ddb275da0de02437a5a9dee23d4bd6be5635dbef..9a9ac8f17d803df3666923b833a7193b48e868a1 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff
index 6b17afa5338aede0d237ac3d08d0f78a5b36af2d..62a5f17e0d249c1b6280fac6f91da5dae89808cf 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff
index 7a0d26688fd0d1cbe97cb9eeaa785cf0c2b9585a..9a5f278abb2521091cfea6c31fd6839a952beacd 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff
index 6bedb3b4129c9fe37c68e549bc2aef6b11992653..b528fa315a1498ffc7bb030bea935676e23c2a78 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff
index 794b5cc7d81170da171305f9d84f2fd54b687c4a..bcb72e395bfb8acd812c623f51922aa304ac512e 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff
index fbb4c720ee45a5e9a59c75ff5beb279bd52df052..884d074619cba67f9aac6218101c32acd01440fa 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff
index dece9ae444bfe032bc5be1f6f5664c53834f231f..7479ad52d4894ce83aad89e607d56e1f218b01f4 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff
index 16067964bb8438f07bc88cea6d27a41cd2cd1c13..00630a53a0ff69917dd04b9e2be2a90ed7e9db82 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff
index a302f4527187b1b2e1d41b9216caa54496f8cc87..58f2c2b99f5460179489d270033aaef7712c45d2 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff
index b16858d0bdcb662c29c6a7b3587ef0b4e1f1ab60..4792cd97389e6ebc1b4627380df78cbb24aea175 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff
index e2a4ebebe190a1903230ed3f61779941ea5ca981..54a7b31c92e3848fb3ec88ad07db5ca43f7c2470 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff
index f91c8c2e141fbefdcef865fbb599afeca579f700..72f0ee130b91aa64ddaa9d322641190f7f2e52c0 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff
index ed0782524777a870d0b4612f1af801febc4eea84..a1c90fa3fa5609d1ba0e905077cb5daf34fb033b 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff
index 2b572fd0f6fdc4b07863048a9b37ce62d94f455a..eaba36afa56068de6191c70ce71fb30f56ca34dc 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff
index 0909307bc2ae71e642a6ebd30109ae3391538d3c..227497f989f0192b6774488751868a0e9b0d68f8 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff
index 8592b740992116c8d08e37f753a8c8614bd01334..38d3615eb241b8eee11be2d0b4e5ca668016a8b0 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b2c99d3224599ad5521287e53ddd4c44885fca13
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6da7ab2a9b9fff24e2250f22828de1bdec45fd422fe7330555a715d005fbc797
+size 558213
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..415daab1fa4d4c170742343215d770230383b51e
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_962a482f90524ff403f5+24129607/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10ebcdc6c4d8503d9c026d7a784bd36cad0405d7998adb9366a82a4a6355d5e7
+size 1281024
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff
index def40964839629aa91b0d40d1b97857bca41cedf..0c1f9637bbf05b14beee2937dc718ec872ac6bab 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff
index ee19a7759dcd1c655c2231ff47a793b5437bcc9f..f8f23b4c0f906be721df644f4394ebf20a1c3084 100644
Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff differ
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e256ee770dbd1dad86c99d87e0b3150a4831ab47
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:770f59a15ab233bea70cb16360c196b23b356f67051422593116053095e3db38
+size 739558
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..c91a665bec69f0ba4f66e3255658f1b383dcd120
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58017c0e14cfb2aa2617054665041a111f19a8658e3856ce7f80963501abfa68
+size 26133504
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb
index acf3f511b6ffaff7078cc0c892c0083f21e7e91b..ee30e44a9b8d8c5eadef8b67957abd276309a871 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:8a22b54d714b2e6fd73a96a878615820fd25ebb97a5970c04db7084ddf50b4b0
+oid sha256:0d5a2ac00c6f53080df7f7af438e48d65e8e12b89afdd1d51fd3c3db963464eb
size 83504
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff
index b1e1ed26daed4964edcf7abbaceb23ed14c61d94..10563e540e6bdec4f9d6019bfa852a3c18fdf814 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:a60ade2cbe5d1ecb7cf752a53cdb2fb8394d10759cceaba09809d3851485a793
+oid sha256:26172a2005a53a0201274e591bc497b0902fda99f9126cd93f3cb9878b6f37b7
size 328704
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f8a17cd08c6a504ae7d0fd64b6823290f002a0ba
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d27d563a4e3028e4ea49838d9c3b5f1557c20ff9dd0f09d8fc5f9c669102fbe
+size 658192
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..55a1b820949f0454247106c19f6e145a314a48e5
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c19467a48069b84098dabdd6efef18c42a69ff42de300fe4fea16b3184b5f713
+size 1915904
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/wrapped_neff.hlo
new file mode 100644
index 0000000000000000000000000000000000000000..6071c6efaddbafc37c609e6c2f0568cedf65aa06
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_accf1fdc5eb1cee6c292+a02c3a36/wrapped_neff.hlo
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0b37ded4085f2a991c50e1d51b12c9ffe8f3dc973820c11514f7d596e4a80ad
+size 2053551
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff
index b0102cdf1927883fe3234477be913799074c3dbb..8effddb7f3f09c5c9eec4b3938a90895d2a6e190 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:76eeed1f996721edd4d90e040c8ba23e123b944dd2e18633add5e9705083e6d5
+oid sha256:3ba979892866e01a49ebf20be74fac057f26fe51409f7156118b04d400854313
size 277504
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo
index 4b52f9010eea2d4081ad81c5c71223fb8c786cb2..5776555dcd57638f3d4fab53bd51f3c7058ac12b 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:05b92ec0c7f07df4aa9454f74cd4630602acfef75330bdfb6ed20fc99fcc27b9
+oid sha256:763ce00a05b2717b1eec08b394b8d3ac44b0790799261b2577ae047074a8bc31
size 289571
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..7d000ce7901fffa81c3f4f6611dc128472d53349
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a27fc391fb00dcc2ff94503a0e8cdfec537233a2da5f9ef46efd2fa572082849
+size 629764
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..bad8ba280d42c12f34790385c01d727a2c73e170
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b79a786451ff8fc0676d+24129607/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b5d6df30b18936c5c141c3b035aa517a81a413ff9623cf3fde5b6be81572849
+size 1321984
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9c5a330b9f7fdba6bb4c837889e33f6c9e6384d0
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2920e5fde77a80d2d66c9ba5addf46e53d032f533c7097cba34e31243f84ed
+size 588406
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..06c7bebc528ec9211d77c08dadc77a20c04873ad
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fb43f4ea0d29f851cd0c0d3575ab21545fe6863d03f53111b381301e3eee665
+size 1926144
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo
new file mode 100644
index 0000000000000000000000000000000000000000..c28017606b905a30c25d51567e0a3028594905dd
--- /dev/null
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cfc00602dde4b251f7ee5e382c599874555cf1857694aab39c3a65c89e133d4
+size 2082478
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb
index 736f69c82c1268401ffca3bfbad76251974b5f45..0129d627101123f14a4e4402f1682e549c127d55 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:f167244b4e5955198c7c0ca61b3c87ab0806098bb2885eefc0395531c74cbda7
+oid sha256:c96a4e8288b63db477161aef20d2dc593901286ea3754d7d31e707fe4224c127
size 97794
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff
index b6a59d8f8fb67662b40b9c852e67e53a30c1c67c..03350e90befb5238ddbac36f604ffdd837d03964 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:902855d7c01e956cebd4ee97d606374819e3d1772628fc92189bd7a1e9c61f78
+oid sha256:252a4ff6650ef0e3e6ac6ec80c05e37f4e7550ffb1fa20a964d313b7f436da89
size 410624
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff
index be005290c62f5dd8c3ac7cd53a782b00f0987b59..f4f520da574809ffd972bfb66bd7b418916c1f91 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:bec5dfc6b85bd28401f2ae9ab0c8ec90e2a88bd02dd3c576d6deba9f2537c5f5
+oid sha256:7378a5ac51d2f7997fb6161efa652d194b4015c55c69a81a545f481f72f45255
size 543744
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo
index 85d9b04936b7744ae5cc0ccb97e8b0596cd86f91..b29d5ec537969633b47b7d7c29fef9c752b83ddb 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:c61a3b4c4dba6212222c9cad3593be877e5466393fd035ad5f8cf131926c2c57
+oid sha256:c411c57f10a33244c645ac53841f60b486fc79608cdc51ffb1d16becb62fd6aa
size 563380
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb
index a87347e585e555545a4c675cb4ee611f7a685b0b..0dc3d73d6f502611fc67fde10939b3ead12236fb 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:07d09f90f30891e710663ebed6df45c1a6f95b261a9e7bc53dd0bf8fb37cab9d
+oid sha256:50905d847902794f428a8974c7081027044ccf590faed67f135165ae188f2cbe
size 82772
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff
index 3d30f59e0104a26d36d46bb78be39af3726702a7..6db762c113654eeb7147221724416eeeb9283ad2 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:04f04de137a9197b2e345c6e043d111f01bf2b2d5a54fe7ecdcce559bed09d1a
+oid sha256:39dd5344c050e0a9146e3da4a63199502cb70a777fcd6f404951cbeac6b1bc43
size 267264
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb
index bf00ca0b127128e16e4289e4fb5c4a4b3d0feca3..5c3d23978d57f35a2c0b82537a93556ec704c4cf 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:c9cb3b1a05b77d5bb908b7d9a5344001bec8bc04360d230f2d8ba908ed19896a
+oid sha256:ac0f26bc9da87d849cbf9dc8a48ab0d949c83a820d77932cc2382548e9524118
size 81516
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff
index 36e2fd2e7228867478301e210ccc4b5d287fbfe7..2f4aa75fa01b7205b009fba71d186531208270cf 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:e3ca4a3cc0e9dc7be863a8fd62530378476d4a3010fd01500b88bcf19c02ed78
+oid sha256:8c0854c99e3f026b8e27a598e7c6ca28e763ae3427e5d38a18ebe30a5460b492
size 267264
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff
index e85f5bf4d6c2fab180a804e703433d283dbe9551..a3fab36585b13dfb9dc197fcd5f49ab4e32faa15 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:4c9a7d9b1ec1b1e241247ec4c7d5a966b709fd0cf6a419d7ac3c6501561efee4
+oid sha256:0a46e2efa361fae2782d354daff8f4fa4879d710fc2e083b5a05df077050e45d
size 277504
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo
index aed0aebfd9079d2a7921fbb1ee00ac31cd9f7d1f..455022f98d458c764ec6f540e725f773837772e4 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:9e1b44dc49d450390d788fee69a2e3b4b61b3e79956d4efe89ed6fef696940bb
+oid sha256:8f3f244d73f809f31747fa0a6a0b4e8c2bbdc6b149c07fec568930eaf617722a
size 289031
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb
index 0a534b6f9b9b13756a0c296de4f4491ce2cb4115..48a1b5c61cbe19e0516ffebc7dbf067fceeee89a 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:84cc69f930e3e2d6aac7b2581033933d9d5372ffc238b94f014e0901070af434
+oid sha256:8e7613cabaab841ef5afd15ed9ed92277bb2ed102c2aad3b0b94148fc1dc8695
size 90382
diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff
index 34ac8d11ea71b6492fb8ddca287dd614b02b1f88..4609d00f841f3a7f3b8461aef83fd54798618531 100644
--- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff
+++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:8e96e22e545fc320f415061f613201ac019eb714022311d84a90adab4f261c1e
+oid sha256:5b1781c8ddad6dd18d2aee87e86ee8f6518d493ef9bc6ebf84662848c7b14f18
size 359424