diff --git a/.gitattributes b/.gitattributes
index ca23b1e23a58ddcb413fd792c7c12d2776cecf34..0052adf2c50974d6dd02320dc4b54f6160f53370 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -16330,3 +16330,105 @@ neuronxcc-2.21.33363.0+82129205/MODULE_e9a0f2507a4369d1b554+4eedbd9e/model.neff
 neuronxcc-2.21.33363.0+82129205/MODULE_e9a0f2507a4369d1b554+4eedbd9e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_fae17d1d3d800dfb2250+e676fd2c/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_fae17d1d3d800dfb2250+e676fd2c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1cb19fd89d2c950edb510d0f07ef7c53bc28df5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json
new file mode 100644
index 0000000000000000000000000000000000000000..b60cf9c8824c38775ac0a5fdc47164b852f44e75
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json
new file mode 100644
index 0000000000000000000000000000000000000000..104859b1e2d3780b416da7789d57fb3b2b92f6d8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json
new file mode 100644
index 0000000000000000000000000000000000000000..728fdb0e486926f9538d2c332670fb41cd3cf9e0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json
new file mode 100644
index 0000000000000000000000000000000000000000..935d5736785ff22f58422256bea2428a54695167
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebbd7b17f9de3d8c98847456308b9a493ac9a82e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json
new file mode 100644
index 0000000000000000000000000000000000000000..04a93e8b9fb9c72110aed26659bee80ecf318c09
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json
new file mode 100644
index 0000000000000000000000000000000000000000..183a4d868260d1e63d68fb9e19d7a88aaed42766
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json
new file mode 100644
index 0000000000000000000000000000000000000000..02d15d288b8ff063ad0f33f533f49eb475d08fe8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json
new file mode 100644
index 0000000000000000000000000000000000000000..0df64fc287ed7fb23b1265271e4aff764e841220
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json
new file mode 100644
index 0000000000000000000000000000000000000000..fe5ce9f7eb66523791def85051f4df5d070a1655
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 8,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json
new file mode 100644
index 0000000000000000000000000000000000000000..77f3ea40ea57ae10c6855ede8ed41b9bf90a0c34
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json
new file mode 100644
index 0000000000000000000000000000000000000000..261935929e3d4bd72e006da09f291608693e4071
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json
new file mode 100644
index 0000000000000000000000000000000000000000..36b7e6ea1869704e268e565adfb0a5fd6c50cabd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json
new file mode 100644
index 0000000000000000000000000000000000000000..f4e83a758a9c3615abe7a88c0c3c919f8c5b5d1c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json
new file mode 100644
index 0000000000000000000000000000000000000000..6aa3c24ae879251e30b0c87fd2354a44aeb645ac
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 8,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json
new file mode 100644
index 0000000000000000000000000000000000000000..e737b32f47b5165f85ed4e18c4562e12a6c36363
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json
new file mode 100644
index 0000000000000000000000000000000000000000..9634af49362c0758366ecf227a1566b16b6079c3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json
new file mode 100644
index 0000000000000000000000000000000000000000..adb262bfb726feed9a1c483a545181e1e35d078e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json
new file mode 100644
index 0000000000000000000000000000000000000000..9beca28fb029c2831d9413f0296cd4bfb77e1991
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json
new file mode 100644
index 0000000000000000000000000000000000000000..5de71287d36406fb891c51501f5435bb3cd66991
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 32,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6b61c183cc1a39234f0bc97b915ae1b2916b950
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json
new file mode 100644
index 0000000000000000000000000000000000000000..17ab88b9e63ce53b5f3eb8af52cca7923cd26a1b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e5851cd85eca64e066a04555b260cff32290229
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d4e65d52008765c7b932dfe1e7ba50142b05c08
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json
new file mode 100644
index 0000000000000000000000000000000000000000..897086b125190f70ffa4ba77955b1a2646bf9d83
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1e6f21bc0e52224537e1063bcb331c07f171c99
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json
new file mode 100644
index 0000000000000000000000000000000000000000..a28042acbf2225c3460774fe40ca694ef319e4bf
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json
new file mode 100644
index 0000000000000000000000000000000000000000..9fa22be7a1db3c8c64289209e049ff1bb8324f40
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json
new file mode 100644
index 0000000000000000000000000000000000000000..b7e155c2e214e501ba567db70b1547c5115b981e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 64,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json
new file mode 100644
index 0000000000000000000000000000000000000000..faea60ba6232a617e544cb1033f255bb8bfee267
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json
new file mode 100644
index 0000000000000000000000000000000000000000..04866f7256b7d021df6ffb450a87b6a5b7ae7f7f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b1229bb93720505cd5a184b4aafa307bec00e38
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json
new file mode 100644
index 0000000000000000000000000000000000000000..9206d12769ab30638032dc08d81929f2576ec303
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json
new file mode 100644
index 0000000000000000000000000000000000000000..8d9d7b24ecee90f915db7714ec08eabb3852a6b7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 128,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json
new file mode 100644
index 0000000000000000000000000000000000000000..0dd1543778a27a5dce5622f6808855b28abb0883
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json
new file mode 100644
index 0000000000000000000000000000000000000000..b5f86c36c44b50f783de388fa6ae239c9a64ba5a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json
new file mode 100644
index 0000000000000000000000000000000000000000..4bb2f984c6e589577f6ce63b78179892a9247104
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json
new file mode 100644
index 0000000000000000000000000000000000000000..d6131f01c697eeb14cbcbe7e427a41257dff1c2b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json
new file mode 100644
index 0000000000000000000000000000000000000000..988036127b47833e0581ed49e8aafb7d6f9d1ada
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json
new file mode 100644
index 0000000000000000000000000000000000000000..748250f7ed2ec66d737ecd1550077444ba2ef64d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d77b4d9cbbafe1d489309f80609a8cc5f3e5bc6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json
new file mode 100644
index 0000000000000000000000000000000000000000..77d22824626a431752233a917eae46630fa68c43
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json
new file mode 100644
index 0000000000000000000000000000000000000000..0222364189539b926aa60fd2dae6e11a789a5804
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json
new file mode 100644
index 0000000000000000000000000000000000000000..0365d4dbaca59d0a0dfeb8710375147c18270c50
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json
new file mode 100644
index 0000000000000000000000000000000000000000..0684d9db345ee8b4b3920b450af70ce3d4039f9a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json
new file mode 100644
index 0000000000000000000000000000000000000000..4bcec7f313e682211601663ff4663b936bd7bc42
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json
new file mode 100644
index 0000000000000000000000000000000000000000..2398252139e8f7a60417b250ba1419737600e76d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json
new file mode 100644
index 0000000000000000000000000000000000000000..17f4a9bdd9772647fedb05c1824e0870316a48ba
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json
new file mode 100644
index 0000000000000000000000000000000000000000..b2c2ac5bbe22fce1645c529c09de4fb1976d1141
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json
new file mode 100644
index 0000000000000000000000000000000000000000..4c7f9fd24ca129c601866810f214e599294745e4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b8cd474e06d6233c455631e530c59dd6b90d2c6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 32,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json
new file mode 100644
index 0000000000000000000000000000000000000000..568beacc4ecdd8b3b907a4aa00631b64936e3e3e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 8,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d31b94f5555b6a3482afd0fb2ef1983b8211d69
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 8,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json
new file mode 100644
index 0000000000000000000000000000000000000000..06da23fab24a8be819e5c5262c49991ee0fff1d1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d6239b323cac9b9ed2acd97be8d1530fd545374
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json
new file mode 100644
index 0000000000000000000000000000000000000000..88e086452bc28a3b27e2d58763c1e8bb33e3c891
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 128,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b7a436cbd8c30e6790d14a6975e6089e064835c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a56a7adc3d2f37f3eb34a0127804fd55cb15ebe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 32,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json
new file mode 100644
index 0000000000000000000000000000000000000000..b7eef340d9839dd406f60e5e09074f34fd8a0f23
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json
new file mode 100644
index 0000000000000000000000000000000000000000..abcd66d053bedca903ba3f9781fc4e33559e139d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json
new file mode 100644
index 0000000000000000000000000000000000000000..9dd781cc653c843ece86b76c06d2efe1027c958e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0de5c58cc1e654470dfa23b68e4c71194cac814
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 8,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json
new file mode 100644
index 0000000000000000000000000000000000000000..02bf2422e97376c27aaabc1e96320ddfcc60dce2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 32,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json
new file mode 100644
index 0000000000000000000000000000000000000000..03e76dacffe7d45bcca7c6d5ba10d272c1333b6c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json
new file mode 100644
index 0000000000000000000000000000000000000000..f592c3520baf8acf7c3281c01ad53b7bb239f02b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json
new file mode 100644
index 0000000000000000000000000000000000000000..f23317af00ea4a695da65ab007a966050c2634f0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e1a57505d02a0d2a2de6d5ff3f1ea7a3b52ed85
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json
new file mode 100644
index 0000000000000000000000000000000000000000..0348476e62387d650567affb4c6b63d11eceba29
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json
new file mode 100644
index 0000000000000000000000000000000000000000..78aaf9560a2b54ea942230688788d017587b94bb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json
new file mode 100644
index 0000000000000000000000000000000000000000..aaaaad71f3371e3ebcfa572a3d0f19e6ca1dff6c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json
new file mode 100644
index 0000000000000000000000000000000000000000..8854f84d3ef91e444cfa2967d6edc352da187af8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json
new file mode 100644
index 0000000000000000000000000000000000000000..88320dd0757ab97fc24b8bb5429f5ec6ceeda3a7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json
new file mode 100644
index 0000000000000000000000000000000000000000..32094e3293a12398687689d8ec223d16c9c92450
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 16,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json
new file mode 100644
index 0000000000000000000000000000000000000000..e91b00f030f4749ee2499e432cd510c9958c23a6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3211b308a4a86664376bf92f6b040b4b99f612a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json
new file mode 100644
index 0000000000000000000000000000000000000000..c11bf423ae75762dbf9a2fb010407901032174d8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json
new file mode 100644
index 0000000000000000000000000000000000000000..6fdab38f9eb3953be83713ffb31e32258c209e88
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json
new file mode 100644
index 0000000000000000000000000000000000000000..fea86f0c5d116c2671d46678692da7c10f9a9afa
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json
new file mode 100644
index 0000000000000000000000000000000000000000..4af191c0f0db05df05a4a5e9b5dd9edb6f1722fa
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json
new file mode 100644
index 0000000000000000000000000000000000000000..1aa9e5dcecb56df62e4848a85dedc232a95efebc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 64,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json
new file mode 100644
index 0000000000000000000000000000000000000000..fdaddd078d67ba8f4daf9033dc568d534a26c8fe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json
new file mode 100644
index 0000000000000000000000000000000000000000..89c8c6b6ccc34216a6f3138c38f2c40d26f5b02c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 128,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f03bbaea1e6d6c2654933c64e41c6cae9d3e4c8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json
new file mode 100644
index 0000000000000000000000000000000000000000..2cf2c89e79f79c5205c3790a21a7f7bc42568b86
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json
new file mode 100644
index 0000000000000000000000000000000000000000..a6d9c6e1f6d40fa7cc11a013f00f58833669c226
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 4,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json
new file mode 100644
index 0000000000000000000000000000000000000000..e8619e682f7566290e8f403bb6d47684016f7c75
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 8,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json
new file mode 100644
index 0000000000000000000000000000000000000000..81ed6ba6a8b2a65a70f690be78d6dcb90c12e07e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 128,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 128,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f11c663c02fe26853782b8c279b18617590159f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 8,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d1561c9164ecb8ff81a2eba7318f82f97ca6f54
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json
new file mode 100644
index 0000000000000000000000000000000000000000..20f92fc5ab329a72e411928aa7d3bff7716eac69
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad614dee47ee53b87f7c30578345cd2782a12b2d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json
new file mode 100644
index 0000000000000000000000000000000000000000..b811c3ab4c35aaf9b612a7388465566d96ce9971
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2f9b07e03c84487c05831e4cf55b2f41af7c138
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 32,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c81cb3fa8adb2d8eb8c9ebee1d3da467b9f5c28
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a3682d8c7bc8337f6b8a70da585e79b09c63f3e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json
new file mode 100644
index 0000000000000000000000000000000000000000..22af7da06d2e5b52395579cffce8b7a79babc3f5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 64,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d0abd972d6f101ad1ed36a11083c53df3c32e58
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json
new file mode 100644
index 0000000000000000000000000000000000000000..f77a061127b7f546832c6465ab79e205fc0d6556
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 16,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json
new file mode 100644
index 0000000000000000000000000000000000000000..4c1d5acabc45c5e84abef1f5f3dcbb516b029624
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 64,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json
new file mode 100644
index 0000000000000000000000000000000000000000..be3f0dc7cf94b2f272cadbf6644dd5268f9609c7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 32768,
+    "max_topk": 256,
+    "n_active_tokens": 32768,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 32768,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e3e99d3c4e50e6c50fb51adaf42a1a53d512b40
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..efdea639f05520b34b97126a265ec2b76b69228b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17b39f8c8ede97056dbf1cb4c157201cdd1b3dc9c4c4148c372238dac197dabe
+size 781744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..ef497462de8957836e6e19933382f4db863b4ed2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc0b3d02ba2f12392f8ff13a9bac92917dec779134da0ba5fe17cd77d002ecc4
+size 93686784
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..51bfc0e7703ed7022db8bbc00d05ad6a0a61b013
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c343e08b825af3c705c988ed225312d9281ccf8d2a025b82a5feb5ee0eda733c
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..01f1f8414c871ea22a3659d12b1fc0e310472727
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log
@@ -0,0 +1,10 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_016ddc6b7463af23bca0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_016ddc6b7463af23bca0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: Process Process-1:1:
+Traceback (most recent call last):
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1328, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline.print_dots
+BrokenPipeError: [Errno 32] Broken pipe
+[NLA001]  Unhandled exception with message: boost::filesystem::file_size: No such file or directory [system:2]: "/tmp/nxd_model/encoding/_tp0_bk0/neuronxcc-jbjstf3g/sgLnk/sg00/SP.bin" - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T13:03:00Z [Errno 32] Broken pipe
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..40abf8693db25a09e22f7018873d7349a416cff8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c72a50b70b6f8c57d4cbf340d5ae6337b5bf294cd016fa63e5ba34d9d1e9987f
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..023023f90c5498b40d3f43545a97851423e1af81
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba69cbd35e068d83852f182df9cc367124ef09435c1edcb9a3508935b72c9c4f
+size 108749824
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..82f267dd197dd8829a1ca54266b13b89d70e5817
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c76bb3149199522b006a2beebd0588443b40ab808ca68b19e05f30c9b787952a
+size 846751
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..c03d4a5d48ad728d5de9dd19ff1e8738433b5b40
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_03ed4b3598344bc45191+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_03ed4b3598344bc45191+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:40:38Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 616929102397440
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 16:40:38.383906: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..3f8253996f8fe1c07fdd1e75c2d308c9174c23b0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d97ecd244ab89b30b527a4a373918f8ca1c7c3757b597aac84e2308bb7644f35
+size 848822
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..499f3e68ee743e64cc763dc616b3b196b3d62d29
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_06a49641d496eff74287+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_06a49641d496eff74287+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T17:24:22Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2157447972126720
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 17:24:22.298526: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22668980354 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..4494299d2b54049f9a288e2266b5f81dcc391067
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7067a69191f11142abcac660df873ee7d51f397a3013c7c85bca230366e62600
+size 626221
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..e507525c9fa654c1be3343f21545573036a36d7b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log
@@ -0,0 +1 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_088b4abdeb935312f5cc+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_088b4abdeb935312f5cc+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d620c520f7b9ec0c3eecbe7ecb0f59be716b2664
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2019a93809269243a0e89e3f6a01964c47bad5086315970256491699d7a65484
+size 774422
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..7c1aa303d8693d819239899b11a626b5d1c6ed79
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e53883517848067b13d15b5408afce160c7dcf97f25ee1b1f56cc7c7d1e9a469
+size 12166144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..73bb027d1691d4a2d006f4332aaa47585c9f1bdb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ba396bc76287eb0372b4ce315e793865b52d14ce7f0b873fbec1d83b857e3d
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..96fb618f0d5a0948a4b2a86ee620ab0d0f710897
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27fcfd5f4aef962bcfe0dd93d5ae52cf21e45d445dd4b5eed000e826fb142b3d
+size 44432384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..07473b1d3a8b41834281798513a9b09c9cf0778f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e85caebad66f7af0d4349a72e3965c9f17932965a1183e79f3905ad48f15470
+size 619281
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2e8ece9c155008779d6ff3a47906eb4a65a108c8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a99e45276abb679987d9c265231d6b0c7356bec94161f51b877c7966dcae270
+size 11531264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..78f4d522c3bfb2aa90a43821b5208e9f35662e17
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73fd03e5d2c760ca5c15120cbdf152034bf8fea70e26a7253b66b8a50ac90940
+size 775286
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1eeb8024b3395efaa32e9a7c52c9ab3c77a6c2db
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3c6376fff4c7849cba41f46b62b742375f4ed372ad632e3cf40dcd54680672c
+size 23727104
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..6b459e48d4bf274e02e2f3a902fad7d9ac8a1e74
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9288525354ae6d6d403c09392b2e11323616ca23fb6d8937eb8ba2dfbfc7fe7e
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..80bc8fdcee97f125609b8ce4b978e17920496e10
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0def30361c2a503f43a7+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0def30361c2a503f43a7+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T13:50:04Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1e37741bb83ff5b87863194b71f73d5b68249ff5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cb141c6e5f09b6aec1932aa2922b11ee9aeadb07dda35fe0cae14f6f4271a60
+size 857165
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ff6c89cee58b289422dcb1edfc30bf961315962b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log
@@ -0,0 +1 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0e96f0aeabc8c05c5cea+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0e96f0aeabc8c05c5cea+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..dfbfe48f894a969e491c8297fd246d4adefb2f83
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a93fa8af1e0c0824afc8d4d920d07f690271fc7dd4d4b002a8187e3ddbb40cfd
+size 855517
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..f03006a0703f99e1b77c83e86d10761feac7e4eb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0eaaad5a2f067cafc190+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0eaaad5a2f067cafc190+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:55:56Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 794912547143680
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 20:55:56.656334: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22617826114 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a06f491d7d910fd2ec7cf902bc1d41cede682270
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf2830b2b48980845ad6e95f9447c96fd5f455a85d6ce8e55ed97cee7711dcfb
+size 635925
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..b323de209b422ac6f91b4b4ace0cb31d1f9e4a3c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0ed7f068c9d503e4a065+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0ed7f068c9d503e4a065+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [LUR015]  Compiler generated too many instructions (5808927). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T08:54:45Z Non-signal exit. Backend exited with code 1 and stderr: [LUR015]  Compiler generated too many instructions (5808927). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9b339e45c23409dd194b43db157a102b4e351c47
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79e8e336d39559f3b2d022f9ced532b9e615dd9654ce0d4abf9f540b3bf776d4
+size 619065
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1e1825cef8337e7216c3ddb5ba0d7e2b112b7ac1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:226ace9d6d26ec64613019920f62d09237981fdd897276090129eafcbd17f6d4
+size 12585984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..226529e631b1a8f48c605e375a8de0195b6208cb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fa4bd283a4889514f87eb412b57462a4c42bbb896ffed2a3f69aee3492ba1c1
+size 32543744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..55d17dff428f9891c86a423ab90013c16849a57d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4844263dbfe2d31a6cea613508f23c8c7499a3ac7dc6ac30adebae178db55caf
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..a4d4aff68a2d7329e86d74679ac4278bd2fd2f17
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2de3569a6fe3533fd014a55388c2794d93db78f83f6a26cbf5563fc26c9660c7
+size 92263424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bbc23aaad77f9db89407242bca7e5507d711fa2c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81315b651b9b0de71acc007000feba008ac6612f7093c71346746a779c29c663
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..307a2e035da05fdfc36800a2619d9e077edf2544
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1242df1c9c717d5ab4d1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1242df1c9c717d5ab4d1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:38:11Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 596485058068480
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 12:38:11.044086: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b88ec2cc6be2bf1f5c2ee8dc3ea4572786edd94e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15e7d18d267b036e4a3b3d5ab592556bab5c6e5125aa84f96eba39c06eba0b1d
+size 854933
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..efe10f86d50a55fa7ea1bbd0e0aa7d095473cda2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_132472d9d56d1f6e6341+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_132472d9d56d1f6e6341+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:39:58Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 4314895944253440
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 16:39:58.774411: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45234335874 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ffd88c74c4d1ca21a5e9d6a3c219cb287bd87a7f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19c4e4ff3e23ae7a276f13655e7d9c4248d2e58c608de8d97b10e9e313648e81
+size 866231
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..3b46426c896add27c0773df7acd0b15488eebf05
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_136817b0b36ea4e43764+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_136817b0b36ea4e43764+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:09:18Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 24013333950627840
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:09:18.156141: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 13029571 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c12b6d443b6a410f669a8c4e45a89b2365b57b9b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:642febb32fdc03356ba70a288f89de64ebc8f0c0a70ac2dcc357538ed8e000b6
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..abfe6d557bbb7c56314fb00de848a9d403ea52a4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_18ed09d734be56c75027+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_18ed09d734be56c75027+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T18:29:12Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0ac971d5324bedd274c62297dc1457efb177db91
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49dae5bb2894dc0c7ea22a320b4b2de1774e3f5b385da2dfd4bee2d6f0ab9d0e
+size 855517
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..fcd33376d2c690d8f614fd781a0d4de4b77a9e32
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a3939beb88e18a29e1e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a3939beb88e18a29e1e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:34Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1231453023109120
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 14:28:34.090499: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35534210050 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..182cdccfcc22d9762425ba6c20369b9ee9741301
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ae3632a5072d91624b76d190b5435e08c58ad0d694bbb070627e66714c00d34
+size 854933
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..16154c5faeaadefed18d6cf62f4c0a339a10bdfb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a41189de75fc0ed15bb+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a41189de75fc0ed15bb+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:07Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2385940232273920
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 11:45:07.082290: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35616056834 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f22a51f351d0e1255dbadfcc83d077fb62aa5e8d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17699cf211a5420bbed1471be879e15183d7f4857245f9071b7690ea8165e192
+size 626221
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1adef620eea40d4f028faf45a526e536816cdae1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e193be0c6a9aeff099b75d11e438d7658b61e7d9e2b3a0648dc16195f134173f
+size 23491584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..dde8ce32a1a0da51d58eba59b673ea1dafb478cb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6c082016fb78c0694a6b26131ba3769d0366bdd1be75fcf052dc0599f9da811
+size 847335
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..5237fb26d1bf135c966e249cde74fb270ee57951
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1afb27ccf10708e03947+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1afb27ccf10708e03947+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:53Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 538760697610240
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 14:28:53.907897: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ad82de52fdcda7882a6b0071fba1246c8abcaafd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d531d1284d95f69f59e96f0f0b3f90c267d794dd73287f9c6a6f3e423b62c18f
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..c0c458ce4d789162c5f6aaab8e4b09f1778c60ce
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1b1cca7c4f2510fb0052+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1b1cca7c4f2510fb0052+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:10:08Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 519519244124160
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 12:10:08.065672: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..29e25c6a7dc05cfb1d211cf3b58143f1fcc5049e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41d65491ea0ce378fe4c09ec1896a2f246b6036feca7fcb1b76b78dfa36f7d79
+size 866087
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..d97be917389918f9fd9103a09c49c656dd6f8e8c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1bbbe37c501b4a4bef6a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1bbbe37c501b4a4bef6a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:09:44Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 9543760929095680
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:09:43.960123: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6514787 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..96bf2237230d77d6828fd0e3bcc3719d467a790e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:181a67a9bb84283f0586e9c8de0b48cb6c2c82a60455503033a79b0095a44338
+size 781521
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..93da5e8bf1a4ba9fc3be7e8ec480ace4ab2f7f56
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6856af256c6be61e29cb285efcb892115af757dafe16df88291750588fbc5f1
+size 15299584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..10aae86bd7104d74b6e509af35c4b28dd5b41749
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7bcf89b72fc6c860bab459c3165b11f81494f1b426956fe68a2a8e10dcd2cb5
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..367c5f92cb61b3afa06cf9c27f6fbb0127282d26
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd66e9b9b8ba0dc67efedf910ed5909e1c7127c76698737f071e2384ff80d2b
+size 44104704
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8a38e6341b98b39c237ed804febcf294b060fd59
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0d7dd30066f1f3fec6a55fc99687b04d38571ccd535769980445c0ab375a98
+size 618478
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2f30c957f919ea0ab1aed62c5193893c9de0b239
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b12f140b606972dcc79469fd77c52e0e8f1d687258d3552cdc1dabbc8e0487
+size 34694144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..018dc5ac6723d9a590602e87bbaf3ae0afa1b59b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fd6c4a2f2f52910be353642f920b75b5c70028dc2bd1bf23297e60672c73f53
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..fa4c1a9db5bd2e3995fec17513f55327578cb2dc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef4d7f38346c34762dd26d3898ef07aa88158ec6296436248646c3066b25475a
+size 16006144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5107da4fec9a51ce03b3859b6edee996c954993f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb3a5ae6a70cd1a89311771086b96da6d3866308a17ce66831077dd386566f37
+size 847335
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..248527bc3bce829aaa09a0aa5552a5bdb1ca5eaa
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2199f1e99b44680fb5e0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2199f1e99b44680fb5e0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T21:12:48Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..326e244109914c388b3585149811e879d9c7d9ed
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34bff56920eb421263097551215c34b7a72bef0fefec7c8d1dc5421a8e550976
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..188dfffefb9a29243faa6a3e71dfb2a0e42b97b2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d530e15cb554137bc29294d99e0ff0a4115c7b28036e97a1eab7f3c60794c0c
+size 34110464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..061218797e3c618c71e4641ce6ca0b82ea9805a1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53e38e1c6ebd3342f8665943284d0d9eb20c5be013901ccf3d70cb4d7b858f49
+size 782117
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..c6ed05d6da89d1f9f2ad3ae3ab74474be9e40029
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_246c7eaf3ccadeb94412+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_246c7eaf3ccadeb94412+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-10T00:18:11Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9b55b8fa90c1855e7eb820a015dc4b33f02d4413
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce7965a675031fb6ab8179087e299eef62fde401cc47e3276351e8e7ff0f2022
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..5db499a1ab63def59bcdd193332e828b4145e756
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_269a21b3e3cb06fc787d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_269a21b3e3cb06fc787d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:09:47Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1192970116136960
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 12:09:47.166183: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bde75a0f775e9f7f040576c4ba25b4e491cd2f53
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52f889cce4bde7411563e34e6e026579b32a5112a22176fb6f304becc453f16
+size 855517
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..24b79eb0eef7d4ed38857c087e6ba65a5daca335
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_28bdef19cd14b2d3853c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_28bdef19cd14b2d3853c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:55:37Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2205551605841920
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 20:55:37.125239: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45183181634 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e5ba6488288a292cc7859460952e33210eac0060
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:489b8b60af9fe65fb3c4e3ae66fe146187b9eca7f7a7cf6fbc6374b96ea26a55
+size 774422
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..85984aed7c325cf25969ebe354b6c8b9eec2f259
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e22d220ea0bfb979f5d149941f5fb80edac8a835063e60c8bc20a8270c60fb0
+size 124980224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..71293912e7fdadd3b6894a6af75ed3de6f4d06bd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:495ed5311d0baedcf0b69ddd8bcecab8ac1c1aa0efc36dddd64ef1a5b35ce65b
+size 846090
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..f06697ba09fac836605b27bd072522e60ea85966
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0227ceadb981329c6b187032407cb9e302c52de8887a5f5af209e721153950b6
+size 31263744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..2bad3e71b2a8e1c2697d7ec0d03d2168ea13f4a0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb2a0b5e818b8f41bb54320b5c41942dd412b2f1968e1c7f98e3627147f603b7
+size 782241
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..9f82e59b6cd490ece27f970444e8ac96dfdf6aea
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_298f485422f7fe2d7228+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_298f485422f7fe2d7228+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-10T01:16:38Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8a0e70c22379dc01fe970dfa4da4d1d24e677ab7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e352f7aabbb8f32935bfb9b0e864fe5f72058b1e7242ad53bb67a7a5ef8609e9
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..f1857ccfd9640fae3f5d531c6296d2a70bca685d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e56cccf10d83902de77cc52025ccd12d7aea3e4f9d2faacf8addb7a8cd832591
+size 64523264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e0ecbba554f399ad213e46f5c71ee73f4212e509
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b04f4307ae0b96bbc8f7ad36629cffb7a1c8454c6c91722a79f9780cfd4659a
+size 550215
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..98a0d5f18f389c845e459887946c3789eb5acb9a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b39f286e045b8038e03b10798014b7d88a336035972ee0710bd1e55661ee7d88
+size 14142464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..09860c8d31968598b97f3869bc1c74ad26ad3405
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccb95029b0a9b055283732ed68dea01b1c51e3524237f9af21587bffe3c6b13b
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..95147992da3acfaefa708c2a778fcf876ee5b71f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2bd1d2a32a0abd395f17+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2bd1d2a32a0abd395f17+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966364). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T10:30:30Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966364). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..364b643e63e72bdacc073b9e797c6881144554fe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:860bd5e1809787b36e7156e45530230ed6812cbf6aacf85de3408dfb426f4958
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..c274324c50b2ae069cefd480c780baf4572b3ad6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc008453b49e30d34158d64a0b25f3e49b33fe8a3f52f4350c0849ee7b7929ea
+size 91505664
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..40807c7d272f1ee86970a9f2ae7b714754efdf08
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29b9840e09349a912595d64582b179a2a28d421d19d291461b13165b193e3457
+size 846751
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..c0835d9c418641891491732a4b24f0b48c170bed
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2daa002f56d4d548461d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2daa002f56d4d548461d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T17:04:31Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b4bb0b17f925a4978f4c3bce062aa9f0672d1a8e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e526d7a2beeefc85fd089bf82ca9ccea2c7df35583681daadb43b95a6af19253
+size 625637
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..d0f3a0f89321108d91665306eb7d75f4838afd43
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d68832b4412d19978ea734fc6cd143e0c68c027ac1c71d7cb5fcb849c1b58216
+size 41585664
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..3ea9589ad128cfc96b35307f8fa95761a86ddde0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35fffc72ca664ed1a983e0fece4078b624c0dd34a0438d38564c1173995a8e6d
+size 635925
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..98ffbe0bdd492884533cfe99680fd293a3ec8d13
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c32854ce61209ed5c1593fb20b3d40b33752b6928005513aaabcb077b0f432b
+size 77589504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..98361beb56a7288fed4c68326d4bceffd7bd9d3d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d89a0080abaad7a6e413992a4eafb83aa9d7032080ed4552002aefea8bf698ce
+size 550215
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..e0d45345791cdf5ab90edf1b518e78a2b0030af0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8be6e8db210047b0539458988d3f4809e28f37a254a6a723eb331e790c40cae0
+size 28806144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..89ad041225086ba9212de66803e1dcd070fa8a3b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15d5244b1e947fc69cf989d0915e468b9ff2cea20fcf861f0330adaa8329d05b
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..6cac80700d6363a905d5714ec60b7adf99856ee0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_308c6613b83f2baf8c2f+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_308c6613b83f2baf8c2f+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:34:49Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 307863255777280
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 16:34:49.617242: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c63d3f44fba51103ddcdd5689491a113e5d7055e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2884ad2f04fff8d288bcd367cc5d47f83b5c30c757b4fbdddc35060e212dc62
+size 846090
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..89f2aa6e28c4b316d3f096cdd97c61f76ec97a08
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70b4a842db789861cc15ab17cad04f01f2dc567c54dbdafe5951b10ff7f0f6ae
+size 33250304
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..794087bbc99e0d7a6843622ecf50765993469055
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65bf9bedb5587b4938397ba2e4797e65c68a5365329c8b664fa8cf4a9defaa8c
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..b76f99b542bbbd3d1c38cd260af7c98024e4af5c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6def46dee245c1b45e1c20aeebc7bac40046a0ec92f560c19c9bf3cd6a2f9a2b
+size 19170304
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bfb6bc84c7352fc8b59f6e1b09e546bca5ce8640
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:480b58bba49beb061729eb56262e0e2d59f10a3acf7d61e00bfbcb170e47fea8
+size 789219
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..490be3bdb2775fd8512e27da4e2ffd9f84c2ac4e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_348fad79553e7f761d69+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_348fad79553e7f761d69+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:02:35Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1150879436636160
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-10 00:02:35.534493: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45157604514 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0ff9352fe22ac8d0efc3e3003feaa8c0470e4038
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a34c13a6fd956ff9b438786d7ec84b7076bb781517bba6af8d7958ed908951e
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..f62956894f6073c224e5c4ae977f5f4b721fc5db
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aff3e1b0b7657bfec4a74de0249462fb8ba8bae86da2f4e3935d172b367cf1c
+size 38677504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d407dfb018a2f1848dde0e5829ab45d88ee3dd4d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a12e90d473fcfeaeaa4c34d7198ae43bf722312e4bb3dde02e0aa879c89afcc4
+size 857905
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..f6a11fa95ec02c6ff62018fd7ce398e5bfd53af5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3b599f77a7291a243623+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3b599f77a7291a243623+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:46Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2467716409589760
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 11:24:46.599385: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ebe3b4fd99f6ed82544ca2d1a7cf46fa791b9ea6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4362717b57f204f1d833d9665f1eb41bccc86063418c66c629eb4dee2508be27
+size 849403
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..34fa8c72d4ae1aeb30b15e849f40bbe63c260467
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3d00b1f2bfd7bc895d7e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3d00b1f2bfd7bc895d7e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:03:59Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 615726511554560
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 15:03:59.568826: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c8d04b8eb4857594475ff41b488393c625cd8517
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5818d6ba0cb53b8bd802313088a4359a070f296b315ce24af0312fd0e99331
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..f1d22053612e9061116303b8a7a4b0c4eeae6144
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3f0616426d0d7a52e6a1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3f0616426d0d7a52e6a1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966108). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T10:24:52Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966108). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e461348ebe1f9cad4593c9053362dfb643424b4f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:725494d25e681e5537c5c33b6713f4e396837197d012d351d6076ded228d5d14
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..023312b8684f899064607d2fdb3bb17e1352444e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3fcf78f378a4928b9273+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3fcf78f378a4928b9273+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T17:47:52Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e1bca3002eedb0745604b5f524b37573b864e449
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fac2cf202675acb45af9e09523b8b7cf3387e8bc7ff29956e3645be5133d83e
+size 775286
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..237b93798a39a5a098bf263ac861903680c761e0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_405bc7c8c866d7839f5d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_405bc7c8c866d7839f5d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-10T00:46:13Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..35777c89e844aa81fcfa79dde81e5d5ad37deb31
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f3c20a68200e43220c923a7f184018fac0c4fcf48d61525a6fe5901080aa067
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..8576820b1d85bacbb4c342a7532539824e8bb7ff
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab1619ebbec108ad3879e793f3fda1c4f05340d654e5e595dbb5764f6990d1bc
+size 46398464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..02555c82e64ba7a1b39b4f806a723567b02f144f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e718cd5a281d4cc41802edaca89d9a2b6bca029cb6b84c80247a55176880c78a
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..55deeaf21beac7564edc6d51f717d21cc369d932
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2a63ece8bcdb93abed21e1ca4f4273e529e175c53f8d9a3d6dc03fda6a00064
+size 67359744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bc8070359bf37e4ea9bf2c0dd56d6fc88606d2ab
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f9142edbcc3c0374ee6510efea946285c02c08252438d7b24101d740eaedb46
+size 851650
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..faada8f3cd351c38f28a4cef7f3fe742e1ff3f9f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_44d750cedc8d7c07669c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_44d750cedc8d7c07669c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5864732). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T09:18:25Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5864732). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0a5d02ac67033b4c7f047dd84379a39dd1a64733
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d942d7a698ac8387a9bb4af5a695cd651da64c9625f7b6993c4617bb4d837529
+size 866231
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..2eeb094886b89935ecc35209fa0e51732d7adade
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_48649c9138561c013aec+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_48649c9138561c013aec+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:01Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 17259583777013760
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 11:24:01.912134: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 8900803 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..00a72e8537433d0e6819198b6353f405cb948d37
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5435f6794cadc2a844b609ec2abb51e4d8c50a489f37b988ce9fbd3e02eb7e7f
+size 857905
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..25e1b26b0c7c20a93d055a51c137d429f4ea3428
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4c1a88b20d6796854ba2+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4c1a88b20d6796854ba2+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:10:37Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1924145348608000
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 10:10:37.591747: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9436a8caf72c15e28ecdc5f9bd5d6750e8fcda18
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:541655e9e94abd28a90a4e6674d3fc5aa1a567dd4a8094053bf68c6be60f8d10
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..3705fdd849e93d049daf3136122f33c110ff9261
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4db147dd7715ebab63e2+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4db147dd7715ebab63e2+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T18:00:02Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1078723986063360
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 18:00:02.112089: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..89fe483c01209c697e885ae4fc181f401f1e424a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dce75c6fc6d9eb11f8096ee9b76fb86c439125090f203afa28513556b11d4fc
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..66ff4745a33ef955d0af967ed1a73c3593d67ca2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:105ac04fa768fb096110d3909dac92fa9caf3b667e45863f7298dde421f70554
+size 9780224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..507db62182b74d773308723a4c6047144033da10
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba6d849b415baaa0f5574f817f0d841af49896eab421dc5d4c0edbd75fd147ce
+size 619281
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..9bc2cb554af3542ae4b12531d283384f54814a6c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:039609f18069dfda753a3fe8e34d57b032000eeec0f3c985f457c965dc4c0224
+size 14654464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..58cfcc9368c5fafef3bf512f19a460c47297eae9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c5af3c2d175f36b77c6b6aa8453f3330f4380ae3ce24e8b9c811ffd8e7ef233
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..4a3f9e80082f78057f4aa281716b0e136dc255c2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4ec6dc4abe7660ed4aaa+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4ec6dc4abe7660ed4aaa+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T19:28:54Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c6f4a5e767bf53ef9fc22e051961b3d274d302ff
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:353eadc5031d146d2d51e3fd720e3e4d06373f495460e6608efc6e984c5beb83
+size 855517
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..bf3f09e569e4fdf50590bf4e586767b59d33490c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_517d90e47cc2ae1f48fa+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_517d90e47cc2ae1f48fa+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:15Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 3078632557772800
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 14:28:15.296732: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 70984467458 bytes (66 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f1c4dc67e9147305696c6229b65dcaa28d7bdba5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5bd0e482372e7ca89bcc7081783feebec4ee615497e63f51736751c35b54382
+size 625637
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6f449cee11b38d8092b9b1d136e5f33171c002f9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6110426a4d5ee7fe59d6e9e4ab8ecaab2b60b6f80f64c49a207382792b88b22c
+size 32431104
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..79477c148a8f08608f3b76d341ef0a3470331668
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6da54952452a08f9d453a9e9885c2aada25223f34579275c397fbf6039a7630
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..14bc214cbe1e8da4577fada6bd1abc1e29049942
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecf016863d0a103e2b31092ade03cd5b1c7659b4ed8f4291e5aff65bbf01d4c6
+size 4783104
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f94c83c93431eaffeab0760cfc9638fefe39ba4f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9156b961eb2357c5b41ca18949c73bac1ea3102926640f9bbbda5686cb04b4e
+size 775142
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..b1be105d81d9187a4d168fdee8cf51c73f07e766
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f231ea1c66f2f7906a7f63db40582ccee1600652b3ddf5ec526382e2aa86d910
+size 21105664
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..09b9a7637acd6742a33347896941232090cbfcf4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f68bb11c0776d0b2f7cc25edb0df176b18bbe9871ed3bf0bcbe755cef147a87
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..3ce65ffa5f29b9a99ebd991066b6f0d425b01faf
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5448d3a55a9a90c15ed3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5448d3a55a9a90c15ed3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5865500). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T12:01:22Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5865500). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..57acaadb781deaf6b04bac7d3f18657f4f6ba8a6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ab1648fdab97bf68365e1aa5973e5b6031c2705415d4fd1b323d85ddf5087ee
+size 859976
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..8e5fd2580dea2eebe0203750cf9c4bc30c26db2e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54baea067a19d8c74866+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54baea067a19d8c74866+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:15:35Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 12006666975313920
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:15:35.183130: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6514787 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..dbee33b4dc9b2955634e49ccdf3378526e84f2e0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45db097b0bad3610b615551bfc652b3e6e13944b68b642c64e9d2945f3020a70
+size 849403
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..2ad0e680d2df450316658befa8747c5ad21c4b98
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54c11cff81559082406a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54c11cff81559082406a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T21:27:06Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 397456273571840
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 21:27:06.056648: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..fe33bfbd7eacf1062cedcd0611821c7d9e0832a9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb04bbd02289dcb571e421b08bf8284ad7eec46ef6aeb00f5cdf020fb291bd2a
+size 556222
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..9743f3a7d93468a4827659bfcccdf368c64bc024
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da42505cf81719e1c931bce3f1de75749aa5069b2c932df7ab07b0ff58b5d8bb
+size 19006464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1cf95613beb7a6a9257f56e136ea30c8caf4fecc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f29038bb3e002fb2bbb8fcbcc6fa161385196f79f7aa57618d98e11309bc5112
+size 619065
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1736483aa52ca99d68ff2a7b5b3237fcd45b8990
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88cc18a0a66664c97dab5ca3b1900f67ee0891b247f02afe16ed456d569eba06
+size 4148224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bf4cc07e93ec71909fbcd7b214e7b8bfac67bfcb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7568e1d92b4c6c400778ea305b47da222ba2c5ce407bc6099eab3b8e1c7218d
+size 619281
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..bee4d0b7a2b4f666c38e0f241fa32aae22a0668a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c24d69006a85c55d02fb750be831a0fd51fb52e3c45cac56571d18a85fbea20
+size 30792704
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ad1a85dedd2f245a3c4bfc76831f2f19323b0a56
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e302451c798a189295934c3769d0d17d4f1faa17589ae2697d667d59c8696c98
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..184fa3a8621bb3672ba94bf3414037713fd2eabe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_57f9d95694e9df2ed2b0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_57f9d95694e9df2ed2b0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-05T19:04:25Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..75436785803ef6652e209ab17e2b2db4e5b63b75
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcbaeaa0a445532ae79c0ee453aa16b6a8aa7d94acfe2921988957a56298a4c4
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..64b6957adab48461a05c884aed995e7593037715
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82e10fb635149257c9b4be441df10e0d1ec1882e75fb725974d155ec4fb69777
+size 65281024
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9382e5f9ef4eac58b5082a9e63144451d90dcbf2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9afd9b922494f37910fd123b49a349abe77e409cd972f1235a17fd3f9528a78
+size 857165
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..c609c8be6e627e4f583fe3572f1921ec17f99a7c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log
@@ -0,0 +1,4 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5c4a6aceb0aa544053e8+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5c4a6aceb0aa544053e8+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg00: [NLA001]  Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg01: [NLA001]  Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T11:31:50Z [Errno 2] No such file or directory: '/tmp/nxd_model/encoding/_tp0_bk0/neuronxcc-cpisyt01'
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..dc88224dacf536764c49478aa04aa0111c56eaf2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d87768a7dae7c4eb4a2cc7d009ba26d2f234060dbbcf093f60036d2d6468905
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..742346ea342166f16e48edd2c00b49fcffa1976f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8aae3cfdc0d72affb6e2dabbe28ce1e6b11e55b8bdb0b8c00f9202994daa01b
+size 156437504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..62cb715860ab6518a8e576227a79d7d330783946
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee6ea37b5482e7729e90f838316517feb2d8aa8bf442cc181bff1b91390f7899
+size 859829
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..32c1fe24d1f58b8ed71e4b3994550182d145dab2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6091f5c081b62d1bfd9f+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6091f5c081b62d1bfd9f+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:16:03Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 4771880464547840
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:16:02.994627: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8095c777688ffa9550a5ee224949b912426e306c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f475f3ab2db55cd56a5e08cb304ec1066cfd61338a8d8849e46967447d76a5ef
+size 847335
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..4eef072f85c929efd2cf79054960b6ca178e12b0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_61e5214b0dfedd5841f6+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_61e5214b0dfedd5841f6+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:56:15Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 320490459627520
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 20:56:15.623840: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b19c143b5895f3737196992fe6f8316a705e2f80
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b4f2d1b4dd63ec116456bcd5fabd1a789ae137e43116972174a6fa8548c2e4
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..efd43803d5aa956efe152626bad1f2c7deac60b7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_621041d0302133e5da54+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_621041d0302133e5da54+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:56:57Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 384829069721600
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 16:56:57.365010: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..97801b2644dcbbcd604cc93393d04d68e4f3b12b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:455c96b407108dd5ed08b5cb8cf14d0837c9bbcc9460a0e90ab3ef9498c957f2
+size 625637
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..4384586ff66e3ed5e22544d4791d7474a62d8e86
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:363463bce98500e44b0987b9a4565b92fd1ad59c6499c707cc89483a6e2e11e0
+size 71896064
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..eb67db5b3314f4eeaa919eac004969a9f2e1b10e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5a83949de28fde124a6106707f8c43c8434bb6c9497076254c57018052a7a49
+size 635925
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..14c27b8697cc0869590f43f207db57c206122551
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_63b228a1aedd2e24d261+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_63b228a1aedd2e24d261+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (19.482GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-05T20:24:24Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (19.482GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..7d7304fc7ece3982185cb2f5b9ebe7edc5e39bf3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402206b7bbe5953ab47efcc4da5d29c60dace2ad95797db062b2260f54211db7
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..507c8803dbdd5551cdc109bf271c7a4fe5750521
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:156f94119153b501d98dfc07fb221b3eef7e5d19def64a06b77b99a2fdd8f1b5
+size 32021504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ea4c584650546f5b8e61408fc4b65866b1d69ef7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99e8bf77004216ebf1e6491d78b157bd39a6fbd792b3f4034b398eef471187cd
+size 782117
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..4011e1674bb8dfca3a5198c047e2d3489c534a9b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_65b8f98e46ae7a9ef982+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_65b8f98e46ae7a9ef982+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:03:13Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 172271138242560
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-10 00:03:13.732131: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..60cc7de8b34165a659b841089b1be1b59c62e1f3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5e69e62f72d5afd9d36424565053111a2fbe24677c272ec7d9e75a8b913e23e
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..cd1840e2a73da7e299b4c359840479d2f9e5b92d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:744ab895bf11348a08e9d410d60645d20e617c71b18f0332ef9c764dff4c728f
+size 22365184
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..82b6f7ed308e2a0e4ab43cd934e6fde341a3885d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:891f2e93aba0e86510e2c390a73ee3e0b76a89330e06259a8f0d581bfdc55c0f
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0a23cd04e4180bd0a1397c1ae84be192a5ae58ef
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2102ba0c5abd896ea993a7f4842986412d05830faf319e8192b456d2ad24cf3f
+size 124089344
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..14c0bbf05c6792c30447c12dcf8457729430679c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7e94729df7b3fe6f3eb3e5d3cc0de0e463abee799fe527e4b2541dba45ac577
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..0a363429b89f7eff7d0d45b36c300712da4c550f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6ad2615b3c956cc7d737+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6ad2615b3c956cc7d737+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T22:21:33Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..324a4cce76418529ebb16435c22b675ab33f0d05
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f46d08956cdfae8727b5a4e8c96c76beb5087a56b570e3b7f4ff74e25552d64c
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..fdcf720b302fdea6741cc44de95a810af11c2715
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6b760a3a59eae94e3abd+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6b760a3a59eae94e3abd+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T10:38:46Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 4314895944253440
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 10:38:46.353799: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..338b05898b1c64778104bf1c477f47d5946bb5be
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bae6eb76e276f963ca3e38872ade2738550a3db59ccda86873333ad34e9828af
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..19ffe74a48d05bd49cc04f530c976bfc6509fb35
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8becfd4f77abceb1cf201019fe0067461a001f83ab161fc060294b1209d0467d
+size 247000064
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..806bad5f4415ecdb9e5c6f18fc8bac2c7a6d8cb3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b86138b4b2d7f4e55d2025a4ffd92d86013f680be49ba958ef9b4c7df7ea9d4a
+size 782241
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..2164a529709974b34cfc97d62d144cd3419c2cf8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_707302ec7fac3bfcfcf5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_707302ec7fac3bfcfcf5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:58:59Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 287719859159040
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-10 00:58:59.107354: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..af67a9657d78343687559592aaf3f73d5ce41e7b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02c3310923e8955055bdc926d0f7a97ae08cb9109d243b06a3c5390962b87f14
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2d6b477ca2ecfc28bfd7ef73ada040eae55dd948
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:669cef4f247802cb9aaa1c10e203e4ca79403bf47d4d0c0ba922413b9ba78359
+size 5039104
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..550eec71897451df89c7f12013e50dfc76f163e7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c8636bfdd8f45ad19de47a746dceced3e2a14101e2253a3b4b9db7d62edc180
+size 774785
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..ee182c404cabe9b55eb6918c4c6b54e4b522af35
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70858608aaec3fae6ef35cf47b9ba0393e30ee52456bafe6b032cefd7d394291
+size 37725184
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..713a9aa1b62b4676ca8e5dca6c39b894a9f49f1b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cc4d2b2ad915e51ac5b7a2e50496585019ee06395d518adc4eb909c014f9d94
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..231dd8d3ecc65e5bfa7c5c97da442e2455895fbe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_74c7a8ec0cb73b46b0d5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_74c7a8ec0cb73b46b0d5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (19.443GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-05T18:03:34Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (19.443GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c6b09f940f7c8f305995a07a17af1a757e7e1821
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51bbf4db8bea8bad83769e617efec30d70ca53fe58123c313d563561b3fa1032
+size 550227
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..74dd1d8d5ccac7ce47144863e142f2a07a2b2904
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f36ebfe03e25c557478ff71757909e7d4f35c68526af628fc3245b72fec9de2f
+size 6278144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ea79d6adf14d3bf7dd44b346bdfa7111aa7c5f73
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1adeb107c7b1d52a0e80c21c775ec266dae29c11ae7a844923b275ba9ee20a68
+size 774422
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..f9aab0159f8807371679bd939bfebefacd96a691
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fd0bf3a62c895f30bbbd382a11ca9f85806e269de3d8a4a5bfa190ddaf1dcf5
+size 63611904
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..06fb6b2051e37c0de83d670726219fd6c2af4519
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89691a25cadf456648cf7022db1fd48f3603acd67d62a7fda0002b609613c057
+size 619062
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..58573b5e88f2ccd7943e2ebff3e455ea1dc4e611
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac372f8641c416a8195b7d449f6c22e5375dacf2ec8b1aae1cb94794054e55b6
+size 45374464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c222ed3b1e52f3649b85dc534f1e5a996c72a39c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b21e615d09cafd59a930c643e6abb941363dde476df24ddecc2d12b67d0d408
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..7fc1a0ef0f3d9d278b835a4ec888c6b1e4cf35f5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb18cd4c3b3c36914e28de792426aa9e0a5b8e0a27de20d53eb7b17ce749c6d
+size 18064384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c0e5ddfbd4a9ba2a72526d2cac95b82f74c6b09a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc10a2c3da10f255700159a331969f20665b37395197756053b88fed2085c48c
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..6a7ed5f94c5f8048102039558ac1a415340ae872
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_798b5985b605bd7d793e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_798b5985b605bd7d793e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5864988). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T10:41:46Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5864988). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a05eb723ebdda4fd118c29007074eb3a1bf98f63
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c140ea9da4f082f3adb76fb4afce224161acbeab7486844ce9d8c483a379f57
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..44853e71b394e286d9822b4b0b164de928b05432
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5378d08113d4d925b0a142822913b1ca312110d9fdf8aa93446d3a1fd71766d9
+size 36178944
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d720f6dae8800a073caf9fe02b2d57624330d5a5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:373f0d1f96164682ea2279c8ee2d6f582d9dce0a79dfea17bcfa8ca48d857038
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..e0b4890b93fb5b1d911f6f2c351918272708f279
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_7eacf49e1370076322c3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_7eacf49e1370076322c3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966876). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T10:35:18Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5966876). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cb068f0ffdf608210208ee83970889db5f142a75
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:155c6e081c7eb1c61ece71fa77412b3525882a84d8a82f0347908930458bf305
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..a64e5290c9e0f9c81d0a8adc321322a4c186c7c0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81503bf195cd5d2e22b3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81503bf195cd5d2e22b3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:34:29Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 769658139443200
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 16:34:29.667895: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a3bb1628c4fc8e3964633add655e0f1bcfac77a1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:027ee1036810d7b2f04c2b4b972174d500ecc1ce1ce172819d5cf62ca0cd8d46
+size 774785
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2d7cc448e59fcf0fd2ef75d71f5450337cb42fe4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b2ba7073228626762f8e7f97b1deabdee1b34778fc2b58390163bf587d712c0
+size 17183744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..61b0189e1f86f52cf30512808521981d014f6c86
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:260dacf0e013367c4a6fa52f5804c41b3cdac5082269571202c1cbf503280f61
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..9ddf6f93227d64e2a37fed267802b3c6e484998b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8175499718da7b5dbf78+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8175499718da7b5dbf78+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:59:23Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 750416685957120
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 12:59:23.391612: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c420d4511c2631712f714541707abccd78a10393
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebded06edfe08b5576eee1338c8a522f0b58b37913aa1e3fc36963ad88891e65
+size 782241
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..b6d50597ef2e8dc2c89a2f095f6b94807e1caf98
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81d78a45011a5ddd4264+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81d78a45011a5ddd4264+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-10T01:50:32Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..2d7b18e9929b91487b917b95f3adc6624d76fa8b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dedbdd882e559cf411b70576464063a641f3b0b262b1ef773f800e71a71363a7
+size 781521
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6fb4bf3830b1fa393b977a704d35a2c3f61cb487
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac58b230d95404f150e7ffc8992f25b39df52c8fc136d19cd872e6b6917eae49
+size 29901824
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bab374b79cfd49161e3853db902cab017bef0f0a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d101273f956ab4157415a6723274f5750c9cc411bbd7fa0b9aa8643e4d978fca
+size 857905
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..0f8f76861e2912d470fe539d98c6a60ba2a1e353
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log
@@ -0,0 +1 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_834268a784010ae47258+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_834268a784010ae47258+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0dd27fae856c8e8aa75e07754fde2b02fc2a08c8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0944eaa2aebb4cc1a1e1fe99226aa36df9b4bc72bb43c1112995b5ed337729c4
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..06c5ecff3426ee27ee33a7ea557c6f3e572b4455
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5351b5d8f7ad3451fe85647b5debbdc4086c73bf2c7203b124f96197d72ed21a
+size 9268224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8ed66b3541df5c426f78ab3877b1fe8bf3d69fe2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d6bfb66b8001bc3bc3e50b6b298e91380d9d5a81a693c2536a90ce09a364994
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..ece52967d6389d8eb6e17b0b72c18953170714ec
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:906ccc485187a84476b8b7d9ff353fa27689c9f5db88efb966af56403ed05f78
+size 46377984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..82c2a61b1b5e5caaabca5f77cc78cbef3da4d8b9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e45c384167132d812fe4c7c2ee3cae54466a96795a354027643ec4f3d9650f8
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..77ab4dbf5bda50ee1012ca11ed509cb792281b81
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_87ac40e3234811a067b9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_87ac40e3234811a067b9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T12:23:06Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e7c04b0654c790681cf77abb8728ee004b3e3ccd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2801c76354772f7b850f7fe0ba970683b2ccc6e138ccefbca9689674d6a505b
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..7892eca96983b66dd79d624741e63198df3a276f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa49e00dcf857c33405eb55bd926e393b11f4428d8580432213c66415a2067a6
+size 35666944
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9bad00eb2c6908fedad395cc9d40fa3a3466b43a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0111c51b3983101ed318f88e489be8c968dafd5d246ca313605397ba363ecd6f
+size 775286
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..8445dfc255f59b5a5bebc231005097f8b5922944
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f82dc164ba206a694c8580aaf2a716b28a3e6ec764e24cadafb9df7df37b08e
+size 45937664
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..3d3a63bdfa7b4b29ed4cf063b2e73b350ab0ab80
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d4978f842ca956598128454baeb9e9201a0c6406312261a042ef367d228ffd6
+size 841224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..e33208028a33036146746284c5d8a9a88add5a2b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a71b6bf8ccf928c1fe739c2e8fb95b83f75f2fad9f7fbc98ec83b52fb814f269
+size 57385984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..99bb690add4bd341cbad62899cd523be5c2fdfea
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40700a68987dc3b2676779f190a5b41c8318e733bb9a2d9b48b798ecc68b735f
+size 841224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..375f1f29c27441eb31d39f1d361fea393cd0b81a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8b45b48562f62a010f77+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8b45b48562f62a010f77+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T21:43:47Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d1b4ff82e6045c2a7036640cf8e75a3b5ca57c21
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a326ecb8f0094afb779864216c4e04d23c873cda9ca95f17427f47767bd42d8a
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..725055d5337d4419e636a3dec8bc3609c4c2a554
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb179225a764454cd65b5cf5bc49a43a8b83441dc6dd5433811dc2af489452cd
+size 54866944
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d0c779920f69b38031e6a42125332787ede9382d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf172816c92a0b2bf781d1385078247fcfb5011c30a8b1bf3028261c5d1739e8
+size 839280
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..46f521af4ed54a72492ec156c16edd150cd113ce
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca314ce2c63f0d11cfc1440335e62331a26ef9e57a09d84ca0f8c22dcfec6f55
+size 14736384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..95eb96496c81c57baf9b71e70bfe5001ba349e44
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f58f33803cc8da67492d0caacd58ab3867d6a509df21377c66e7d38589199523
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ed8bbc3920ae59526a67ca0ea6916f0bb58c1450
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8ccf7d7e0da627625b86+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8ccf7d7e0da627625b86+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (18.734GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T16:45:52Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (18.734GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..52330a0442e132bb0ac847101d36f9af526856a8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:193b34e4fbfb73b0e96c027774b7518fc7978c624a2c8d92e76a7b6b110bec34
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..89c2e3a97d33df2d0b30b53503c75fc080ab3420
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8d8d38359e9e9f052a77+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8d8d38359e9e9f052a77+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:22:20Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2385940232273920
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 10:22:20.765302: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1d9edb5e1cf2bbbdcfdc8d092e3d62611b42886f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:357a1cde5273dfb5488f1d3ca1a423238a582e7b466abd04ffc56f64971b8575
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..12466c95f51d7c1823cec80658712a654597ebe6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cb235029c106f08e0e5127c3df479c5256ed050bfdb3f1b7d61986272d5871d
+size 27689984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..556363d99666586a4066764be08fbd3afca04404
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64d88260f5bf6955ae3dddb08b19e8228d17672b9d527257136a8d37883bb5e3
+size 781888
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..295a5c9458fa88062101ddc62407bc19c85f3a84
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aef5d7c5a48493b2ebb8448239b22e782b8054012f04edd18f18e3e2e264243
+size 264766464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5a09b4f851d44e4000cfe47b20781c0c740057d5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6535825e54efcceb9281f6b25d0cde19b16ba7a47b5ada7ce2c5c577bbd764d
+size 556222
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0ee7d5f233204470122644eb025a8e3fa208e7bf
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d41e80269c45f29160b6ccdda317a4207fdb0b73d30cdad804317c6be5d7cbc
+size 36680704
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..39db8a319cf188dd14a13d412e2ed492abb990a4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:547f189b3f1fccbbde96bd0f508680b25ab65a63cdb864a1679897737dea3891
+size 782244
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..003ac594240df7c936e42c756017b3f5ebb04792
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_917f77d4c0e9130220f1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_917f77d4c0e9130220f1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:29:22Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 575439718318080
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-10 00:29:22.587343: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22592248994 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..6c15ea5ef4779eab2b94774ecb68b76cc13c4a75
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db84619e6d90debc6a0fae8c22d8d22211c1e2ef88e25bc05da015728baabbf1
+size 849406
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..3c58df8d55dc03005658fbaf4f24806ab196407d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93763701819fe4666319+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93763701819fe4666319+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T21:26:47Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1102775802920960
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 21:26:47.480495: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22617826114 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..2fa6a966f699ccb6b51e113b5d581ee77974f420
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad493bc86dc595471874d38e94a355311ddae6339ff7c07e5b42ae5d0b80a8aa
+size 851650
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..dc0583e25f98d521ab6768aa007ad128a0c26bd4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93b8b0c7e8019251e6b6+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93b8b0c7e8019251e6b6+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5965976). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T10:19:17Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5965976). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..01c6f9098575e4dfa5a65fa6a58790408149a5db
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:446dbce3ac79d663547505542c3aecd07cb61a1d9089ad16f9c5964aff3e5194
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ae66e07370cbb62f53b859a165e96b03d916a4be
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_94cb050e98e4f6114fd5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_94cb050e98e4f6114fd5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T23:03:19Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ea8aea78abbaed9ba2a89cd64b9bbff87573151d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:109b9d025dd23883b21235785d35b9f954caa4b20cde9ec870bee811a4611765
+size 857165
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6fa80bf310549199f070881c23f0c0a6908dcc17
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9c1a7470b98b717e957dfc1990f816b2a5bef0c371d5a1d6a017e5e13826e7a
+size 132373504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c9fbc36d42abbe4a4fba5bb6b829df7cccfbc9fd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9c40b9a423327569ea3f731d7f94366e8ac985c8bc6472096b0ab785eb8a082
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..80a410a3ac7fef3bccfe18b1067c6ef7ad121dc3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_977bee6f36b120b2f4b3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_977bee6f36b120b2f4b3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (19.581GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-05T19:56:03Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (19.581GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..943526674782a44c220ab1aa0274b4403676cb30
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d631ee9fd37e14151f29830356b7a11682916fc8746c3f6274333c81538e1e3c
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..69b1f8b21f1b6fbef1242c6f0b0fe2754d9abead
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25faff5964484e532142db512b5342b1b5b1514ab7f67a2f1d71e31891a991bd
+size 28417024
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8180b85f333a4bea0853ca5a1715f1c6f7279228
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e07f46d157553a8a739c6e9f5624d2082b1a1a0d09b34b31b5e1f658670c74f6
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..94fa9713c6a2f91471d626fa806985a64785d016
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9a666844f4acef43578b+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9a666844f4acef43578b+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T12:52:22Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..96b71721a78709b533b7fa24dccf5e6abff9fc4f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a832b3ba0b12bac416866610d955045724261ee33d6e31c56ee4810be9a529b
+size 781521
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2b04a6146884f3941c2851822ed3bccc3ce85ba0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03e18d0a88fcc2b254f0b37412fcd524591cb9b90b81efcaf97d3d1a44dd39a2
+size 41595904
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8da02cc3c7ba3a93a31e0aa90ef3162a8aa93402
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31d07d6c314d404dddf82480bdee8525232620f2259d260d3a36df9f565479c4
+size 859829
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..dbfd270dea265a632414362fcc894235e9fbb0c7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9c85c604f278ac1fbc3a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9c85c604f278ac1fbc3a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T09:15:25Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 3083442921144320
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 09:15:25.136381: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d3a7bb2d7eaa4d399fe2191aba411e1a8c1c39b1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4747986d49d64e7f2993f0384756b5e1c2de90967a683005baa6ddef238fb9f6
+size 846751
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..13f055373a3b47cd4083618da3055861bc124b38
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9de3d9acdfe88b9b1868+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9de3d9acdfe88b9b1868+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:26Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1039038488248320
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 11:45:26.097106: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5e0d7e5c0a8e53423535a1cabe05e71ddb9d1c14
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0c35681fefd173b040da6b1e5ef998a51eb234a6c5aca4735edf48854e1c52
+size 775142
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0efdd68e073d694aea2624aed3a6889f1bea9014
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:382defbec55abfdedfbec0e9f3dc3cae25c504a48d2f07ee0361bcfb1a446906
+size 10691584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..406b5a5129369b33de963397410b0ca13bb612ad
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4338629fb3796c22a3fba7d5dc20957a4b434788bf4454331135ce47d7fa6baf
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..593ca9a9639a023918ecfd596acc64236a3ff2b0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a052a43168c011c69691+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a052a43168c011c69691+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (20.996GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T11:26:37Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (20.996GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cf413880e2ccb1896acbc655c7f28eb7e68cd9e9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:173814f91ca84d26c6831960840e82f42ccce0affb4cd9472e29446a8a80aa13
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..10f10087026df16a972193fabd8eeaf662c0d758
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a18da554cd7fccc0540b+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a18da554cd7fccc0540b+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (20.034GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T11:12:46Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (20.034GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..24c5e49e88686b3e72379106cff16b1e1f8bcc19
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:227fc2170727f24ca0fe429fc6487925326bbb0438d5089a3113a11e61b394f7
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..4de6faafee46f9b6074c292649651dd3b672df4c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6731c8f569d6f3b824fdfe7d4471310f266ea02b3972a51e9dd2fd68b2eca3f4
+size 129014784
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cd915a4a576c3629f01f02ae88f2770b17b91790
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14d1e1b857c913ac50b7a96cbe4e7d9742412e47ce55ae083d53c525a3ebfd52
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..b3651d06597ba14aa4b77924d3698f4647f2709a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8ec1d3ab3aeb21f8d60152ca8cd3f18787f055f4ec622c162c6708f139660f1
+size 85689344
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c0b754c997f61e5f0615ec4ad2a354add32a5879
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a70aff37b915d8d5c648866f14bf740cc985c93a43441c58ca6cba257778e4f7
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..be2661a9fa56967bc304fde0ed27dca1b324d063
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3effcb86799058bbf2cf83f08943889f4bd1a9d48842345a14536b2b85b6ee2e
+size 87870464
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c10fd3c08db40ddb0bec9e5292f676274d1519e9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aceda0cac58a1b6c59aa6387641e60dd0a22a7971fb0df6a0bb5633e82adf64
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6639d8424f69cb656a766c50b5f85dd384ce6c41
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f731be4f5b5f1225232fc08d4c3909c8d4ee6bdf26ca814a55a3417b36c597f5
+size 168776704
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e91edc67c93d64519a7b9f2ff2c7f90f3d66c520
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15100ce39f26f16fcaddd136cd0bb892f850cf5d6d9d512795170a49b80bb21e
+size 851650
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..48c0a72bcb64ebd2b6ec8700ea5f86fe456149fb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a920d77278c50f1829c7+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a920d77278c50f1829c7+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (20.627GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T10:03:06Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (20.627GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5f0bf956db3d06f6fb01679d340e04571a1bb5f2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cd95c6c3b85b32eee96c58ed5c9afccb7c059a5b408a0d846dea2a75fc9d39d
+size 619281
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..92443cb68803c4d5d754bbb1d4409fb7aecc33e6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6f82492624d3ed8f4b81d02e0254e685ffdb4d7210136f0f9e3e129091c7d30
+size 24597504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ba7f544ff0ce4685e6e1435b61b7175923c5c112
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e4a5ad1a29fc69d1d0b9865e90341f1949a2bd4f2a28d1c8a39c2ed41e0977
+size 626221
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1264f89e0987be4f4389011341baafdf459f7e2a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:328de3e5d81b41675e00f3c6919cd9bc5a3e973419a81f807044414733b3eb91
+size 30147584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9312dd2382b7582a55214dbeb8c999c6ac258065
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09b5c6d44187b2679bfa49ed9682578b9220f18b98a991180f7c19d1c0e50c33
+size 619065
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..36186bee599b4ff9c72175b249f4dbf091fdfc6d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6da604473413c4f09500ba4accf0a72d68de685f01e59dc8ae2d89d457c2ab4
+size 15627264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..adaa5c55013fc5f02cb6f9c7d35be9e47dbd2bed
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43100720449b64bb211897dcc9ad91049212ce6c47f679f810861158c4a3f1d
+size 774422
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..3f052e909df7d5caa8ea26048c7b90f74cef4dfb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:104d6574e0f283c497f9bb09caf1ce58cb458bbe02531063d7840b40cae65307
+size 143565824
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1e675bf33fa1b6d90582658b185145c090978c35
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:194246e4f61b065a989c95b6557fb80f7bebc4cbae498a64c0b73dee6f4f9a49
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..8f480259af4d1fd838c050b8d9a9c7a1d7df6e2a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b2cae69a68a935040fee+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b2cae69a68a935040fee+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T22:01:03Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 551387901460480
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 22:01:03.639098: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..18acc4154cbf38c4aa1352a6433661639243b8ab
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a83329ed15d27b921b3e12f316c39cc253008d377ebee026ce086db24479ab
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6377ee1f16b77ccb821f37741836704c9041dd9f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1bc1f4c0d2ef4c0c95249ce8f3cc4b158ade64ca875dd4050a89e26e663e41
+size 53627904
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5b014acf8624d092db074a5cf2b24281494d9be3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0707d711c256cd0873d82c58f85ea29c1e9038e74d0779e9037877c264bc537f
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..02d7cc5f20e8fef9328936ec392c1930039450d7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a14b33e36767751a74d8b1a7441b6e95fb8b7c0aea6802e105d205d5ee4b8a13
+size 28939264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8164d5252e4fb7d84c18e5b58889f238269139ec
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa29c402a1f1fa7ae1feaecd65918b147e28f0165344ce53024828ec2a25719b
+size 846739
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..41332b86d0ac41900d87baedab47d0c38f3a9499
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a343364333cb4e15b372d2e573e86a6d38362409de7870fcaf2b53936627d255
+size 23563264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f12cbd0924dc35dd832f7158737ebf59b2ad3ba2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c885e2099288ea6db6ae55e6a254cf3721cf67171201bafda71ecd3d5c3d7f7
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..93a6a8c2650bdbfc10df3629f0bd24966c98ee3b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b66cb711b2665f2307f9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b66cb711b2665f2307f9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T13:16:37Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..f03ff420a3042c8ab9f4ad3c2a32103867a66e97
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc258e590ae998ae64e89ab0e2830c96ca941ffaf8d619d07b796e7b80a7d5aa
+size 866087
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..b8c8c07ce3c9efc3a22fa8d8371fa8a36682f67a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bdbb42ce2ef79ccbd78d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bdbb42ce2ef79ccbd78d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:24Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 6166885842288640
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 11:24:24.204047: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22979837954 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e503d829b5d2aa8605a5c2b20dd1b2bf03a3bc92
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f97ff961fdd6692ffad8b49a4ab3ea173d45280f3f2aaa8433e37489c61295f
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..e59d264d173f6f49a408ee9c30c561b12160600d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf24b4e19296bcdf44f4+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf24b4e19296bcdf44f4+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:21:54Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 6003333487656960
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:21:54.840063: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8708a21f2586ccde87e80b2168d4417624660a34
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05f27889c431ff3f43f322697301754e25bd65666daa4e3c0fe93401d5d04e14
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..5f524cc2de65462e3ef7abb07bd649db9204af7f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf6b149e8e5c589b0317+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf6b149e8e5c589b0317+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:37:50Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1500833371914240
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 12:37:50.269674: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cc3378be22d48882ddfa6ae4a41a0f1d48bdc359
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69239774522ac2018f8b08b1a63a67d96ad634f685e9ad21d9ffc88154a9bc0c
+size 550151
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..e95fb4fe08e108c302582e245d6aa85c674a42ca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb0b04810240c8f922c56e2963a91dc64b3fd5468e1b3a550985fc7fd6070918
+size 4148224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1961dfa58740b350fb86bc863c16b237890945a6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbdb42072fe8ab4954f3d1be7f178cd0ce00bdc43fcb0e6532fa0db450bbe1ae
+size 848819
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..cc9f091788d129bd1209b7bc8c6c17e88f54c117
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c241fa327ced3c459be8+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c241fa327ced3c459be8+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T17:24:41Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 770860730286080
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-09 17:24:41.125977: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b76aee176485875a79189a7c03afb5c22c4f237d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97002bf1276688b86dbacec547a468b20a39619ad01acdec7afe227d0588d258
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2936d17b1908887f89585d43f13c36cb76e91d92
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fae1b5976f6f6a8be08768e85fb0f398b453be08fc257d13c8164f7af5229c12
+size 78746624
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a946a12a2cbb48ed17cd934a1e0a844a96fd6c23
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:414f56935a5f34465b170e5bb706e4b699508fba33b555fec5464b21cdd2f868
+size 789219
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..07f83f062d22fc1c0373e9ca89361e33a5b02bc5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c49437c751cde6d77d42+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c49437c751cde6d77d42+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:02:53Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 421508090429440
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-10 00:02:53.813190: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22592248994 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d37ce3c238a9310643ebf0a80fb4512bcbcd66d4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69f82df38e230d68646f4e3a483402f843bebd5ea220b4a44db806f3037cee79
+size 781521
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..10ac8a7a39b79c2b8cf4b368141803a651c4c15c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b56f524f1ffe52b3a27000929a908525604ad63a335ecfaf1df3fcaf5386330
+size 81316864
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c3cba9180d9b617cb9b3581d7a707d008434fd78
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:665829c36ab0dd72abeb33d0112b5a30edef86ce4f26d19dc44413c595dc8a07
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..f8471e4a15a02602ad8b96d1bece63e3ecd959c3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log
@@ -0,0 +1,10 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c92d4f9d2dca0587d09e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c92d4f9d2dca0587d09e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: Process Process-1:1:
+Traceback (most recent call last):
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1328, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline.print_dots
+BrokenPipeError: [Errno 32] Broken pipe
+[NLA001]  Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-09T15:21:09Z [Errno 32] Broken pipe
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..b1c1b01ba1100a21964ca6a0da5a598b5bf1ecc2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb7bf96f1908aeea39937880d245321efa55e88f12453236533767fef4730577
+size 775286
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..b32954a74dd257dd454f71ed673eecfbab7f8e7b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c24274c84859318283c70099e65d1f5e255cd9169510089776865243e1f822e
+size 40920064
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0d53269032545c7560dc0704f70caadcc7243bfa
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b146e0ba6174d46f79fae4f6be805e7983173b240feb0d76d1b2c0b185ed99e
+size 849406
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..55c051b1df6889e1c8c36bc600fa30fe09385ea3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_cc2ddaca0c662ddbdd97+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_cc2ddaca0c662ddbdd97+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:03:39Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1539316278886400
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 15:03:39.367814: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35534210050 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a618461497bbbd25c554319aa9daf6efd7cba5a5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:354eb59dbfa69b026dfb2b3398a9b2a549ae0069b84f3b1b4af1f9cfbc61daf4
+size 841224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..27331138f6131047d8af9e259090596856f60dda
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ccac30ec5602d9f1f532+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ccac30ec5602d9f1f532+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:04:19Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 269380348805120
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 15:04:19.605773: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a86cf25609f120a86e21c596c432ad330494c8c3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:285b84e101180c9d1f04f1506ae2f5509f08786bc0c8f05ccc562883b4025b46
+size 846739
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..35e957ee942a223e34a4e4830a66b54edca32abc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15f4b4fce665c219271072e4b7732c424124c0faa8cd6d642c0e0d7d843b8f83
+size 69643264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..57457760db29ff119309cb7cece252ec078cce5e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d5fd467fd91f3b3dd73b6c8ed27b5f9ed16f2bbe3ae2f18fe7fc4cb4995f708
+size 851650
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..bdf027faa7758834b9a72ebac549d8ddeb15512e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d3bce6a0381735dc8108+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d3bce6a0381735dc8108+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:16:28Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 2078076976496640
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 10:16:28.780083: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9cd23ef3519b16f687071fff05d6010279d9566e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d13a7140909b86c181885eeed7df5c2b2d4dcd10f9a7a4a987d97afec5a5778d
+size 550151
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..77da8599835a8c657c3775f789c8beb7f2005bae
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c663c972116a5988617dd8e9506304f016cb4976178c90d6fd3d798a54f1c44c
+size 7803904
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8f757f067d6ae625c67089b650fbd02399793250
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78250c9bb88f3552089e6300b6e05295f6a35aa8a0b99238dce3033e69b04606
+size 857905
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ffdf9e205a39e692f5fd7197257469e0593efdad
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d6835439f8b0403c5735+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d6835439f8b0403c5735+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:10:09Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 4156153952993280
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 10:10:09.541494: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..692cb22597201e3da583cca101eab24bb8a296c4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf5b2ce1c42b9ffeeae9ec8bd9d00115b48a1d54983088830f9a1f7aa3a9f8d
+size 635925
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..d92ad308a83e71e31138a7e809f435df9cdb2273
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log
@@ -0,0 +1,7 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d85caf167e83e98cc38d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d85caf167e83e98cc38d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (9282847). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5965912). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T10:14:43Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002]  Compilation failed for the following modules:
+  Module sg01: [LUR015]  Compiler generated too many instructions (9282847). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+  Module sg02: [LUR015]  Compiler generated too many instructions (5965912). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a04054298eb25d2cfa7301614a31366738e4e5ac
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9613053a6191cc0d3106c32c958989e02a5e2eb0b4c0dee71dbb64e55098ad2f
+size 782241
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..16ad86b9f3ab266e8e32a87ffe4f50573bbd2048
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d1258d9e9070c4e6fa7d65156dec671c682b1cbf800990350de6c4c61d168f5
+size 58133504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..24d0ea099fd931153f65d83999f557ed38d1c362
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4878546bedc065fcff030aceed453c669fb4c4fc8f903a648037421f5f1f41a8
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..1494fe941fc0ff264aac05ca6dcbe8aae6089bc7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da0fb0cd2f564c7a36bc+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da0fb0cd2f564c7a36bc+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815]  Estimated peak HBM usage (18.886GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T17:10:17Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815]  Estimated peak HBM usage (18.886GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..28560bc931dbf56b229a82f2bdf1ef5d89bf22a7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea6e255f534053e0efd1ec470bd98236196d62091cf71477f2e46c069eb4c1d4
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..bcc4b89198f9818e27c6ec2363d426f79f144b68
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da9566f57ba46390c838+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da9566f57ba46390c838+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:27:20Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 3001666743828480
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 10:27:20.729959: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e82bea3735bb8936954c63f979e35312338f4825
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:812be1e7781179bb361b296418f22fb449ee922f041700670404587296e2b9f0
+size 859976
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..936a689cac57666618112dfa80f4249b30704a86
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_db2d12d6296edfd32572+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_db2d12d6296edfd32572+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T09:15:02Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5755
+             convert      1055  18.33% ################################################################
+             reshape       802  13.94% ################################################
+           transpose       723  12.56% ###########################################
+           broadcast       550   9.56% #################################
+               slice       543   9.44% ################################
+            multiply       363   6.31% ######################
+           parameter       328   5.70% ###################
+   get-tuple-element       324   5.63% ###################
+            constant       223   3.87% #############
+                call       217   3.77% #############
+                 dot       181   3.15% ##########
+                 add       145   2.52% ########
+         concatenate        74   1.29% ####
+               tuple        73   1.27% ####
+              negate        72   1.25% ####
+          all-reduce        72   1.25% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4365
+             convert       911  20.87% ################################################################
+             reshape       650  14.89% #############################################
+           transpose       542  12.42% ######################################
+           parameter       328   7.51% #######################
+            constant       258   5.91% ##################
+           broadcast       256   5.86% #################
+               slice       252   5.77% #################
+            multiply       218   4.99% ###############
+         custom-call       217   4.97% ###############
+                 dot       180   4.12% ############
+   get-tuple-element       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+          all-reduce        72   1.65% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 8629791888506880
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 09:15:02.622084: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22979837954 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..fcd595558757ed4ba63503609fcec036d58de3fa
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96e8b498776a4fbb8d9d7bc104bee53473c2a833583f9b6abd22a9f84c7efef5
+size 626221
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..80d195846dbb6fa307302c7be47044db0d5445c7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6234c0f758d43878b02cfb741bf081894bf7015f04efa3174f15eb421b201316
+size 50310144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..76b8ced83e79b411c9290a33a046f4dee6a6ffbf
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:034a00212a21335810951cc0246a799eab1c8d66dccf458647360499210c29a0
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1a6fcbdf9ad814b13c6dd579c15ca18c619878c5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:775bec48b5559232804366e382f4e5fa33a52d4e94f63c867e0e04612f24dedf
+size 17368064
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..4a0b9c692ecfccf0aeda59dbf506cc9bf77803fc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b68db39f59e8daabb324bff25d173d4b205554807d24c90152ec070c7ca211b0
+size 847335
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..a38e49b6c649573413275f10a245b5b56ffcf4e8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_dfb053688abb949623b4+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_dfb053688abb949623b4+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:29:12Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 250138895319040
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 14:29:12.728144: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a12c86d76468469e9fc929d2dc22b24c6c53dac1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4515e5e8670a797a84bc3fef1412aa154920613b709b446030d5a366cf1b227e
+size 848822
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..6940bd4ed4cbae95c8c77a0fb115c9f7cd87c2af
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e0e2410f0584782f6618+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e0e2410f0584782f6618+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:09:28Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 3001666743828480
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 12:09:27.928190: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35616056834 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ba7ab511d00d8afe3079146ae23efbec4716e3bc
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a209b2d9dc83dbfa67791a391e8379c8c0285b720767846acb9057962d6a2e8
+size 847459
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1b853fc73eca6994d76396fed8f3313dbf48ef88
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9917f636b777a9a8e91d819bc8e6bc4141a57dc7b2e36ef62c1d3faf757e629
+size 137473024
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..db9cc783c097cd967d3fffa60d9c2b1c4de54ca1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f53916a005720b5b71dac3bdeb66d73beac34fd28b8b69a899b700b9999f849
+size 850786
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..27d2daef822f1829483b459a2dee6bf31522f55c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b597e1d53f4b8568751b028378ab36a349692e15e818370eab7b3b044dee845b
+size 82648064
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..24e353a9dd5fc6e650e482485a3d84f4fffea4d7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae24942ccf7f48c898aca83d0b972583aec495e76cb1ab8c28ec0e28ef5f80a6
+size 838696
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..12d6ddf795772de556d5463e06eaa69537913074
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d09cb8780d9a84446562e9fe6cda7d1f72e76808f7eb5e3d471b5d13c68e8809
+size 7066624
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a61255edfdcdffc33c86faa0432bc9eb2e866324
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31573073618141c38daa4cf73c88421971ce336e262a4957dc399fdbc5c64151
+size 839424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..9711f11c6bf32c53bf88cadbf4726e6d71937ffd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log
@@ -0,0 +1 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e666ad80f7f44ecc3879+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e666ad80f7f44ecc3879+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..5ea39fd7250791a69bf7bbcb54f1dfe26bb1ebe2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b96db805279e3ef7af88d669b824f9024859457bcfdb230fd3daa9edefad8dc
+size 618478
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1546badd1ab07e99b338e42e04e1f562be084057
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe554cd98ac6b2eed8ab46f385d696af279bba9b4ca3b9b341d8e8d9be9d9b9a
+size 3175424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..fee9d823cb0f55ae6e9355d6c1b5ceed832113e1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d0b73f2290b0ec62e4eedc330950c641a20abcb8effbd099f2b98bdcf3155c
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..040d6f72816e499d137375d55bedcbc4029efbb3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed597668f7cf6c784e20c4c5dbd2374e333e705aa825273abdeac5d5081612fa
+size 16395264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..504ee628146e415b70e9689d34b95b9c3eb6afbb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e0ed0a4496a56fa746d7da4f46719a638809411f6cbc81c4b97397b18da5d15
+size 854933
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..e457380664b92cb6e47ccffed44fe95e71385346
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ef1a1a009784d6730c82+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ef1a1a009784d6730c82+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:40:18Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 1541721460572160
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-09 16:40:18.136125: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22668980354 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0cc181bb04b9f78cf089210b60d59880f5eb7b60
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:657eaad6331813faff32c8b7f629cbcec4a99a117868128225dab1204d801e8c
+size 550218
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0b44ed3c7070831c55a7d7cce31b6ed8e32d6fcb
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:590da2246adc43a4c7556aa95d9e60f2240592fa16debbefa06d00f30a900280
+size 56300544
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..873c0e660b3923f0ae744691da1d3d87a0af8e53
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fc410382b5aa7643170e0208839c95f062ed51f270b4af1e3d1db36d9209e50
+size 782241
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..539d8f278e3fac9c3551e74373157349e5d1d66f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f1438cf8178f932a3e30+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f1438cf8178f932a3e30+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:29:41Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 4819
+             convert      1055  21.89% ################################################################
+           transpose       687  14.26% #########################################
+             reshape       478   9.92% ############################
+            multiply       363   7.53% ######################
+           parameter       328   6.81% ###################
+   get-tuple-element       324   6.72% ###################
+           broadcast       262   5.44% ###############
+               slice       255   5.29% ###############
+            constant       223   4.63% #############
+                call       217   4.50% #############
+                 dot       181   3.76% ##########
+                 add       145   3.01% ########
+         concatenate        74   1.54% ####
+               tuple        73   1.51% ####
+              negate        72   1.49% ####
+          all-reduce        72   1.49% ####
+              gather         3   0.06% 
+                iota         3   0.06% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4149
+             convert       911  21.96% ################################################################
+             reshape       902  21.74% ###############################################################
+           parameter       328   7.91% #######################
+           transpose       290   6.99% ####################
+            constant       258   6.22% ##################
+               slice       252   6.07% #################
+            multiply       218   5.25% ###############
+         custom-call       217   5.23% ###############
+                 dot       180   4.34% ############
+   get-tuple-element       180   4.34% ############
+                 add       144   3.47% ##########
+         concatenate        74   1.78% #####
+              negate        72   1.74% #####
+          all-reduce        72   1.74% #####
+           broadcast        40   0.96% ##
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 210754045214720
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-10 00:29:41.354684: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..e9134a813456c5120eb760640074733d8eee90e6
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:941107baf06d12938b01214e5d05d5d16acdb5bf7f802526e594345c30db3377
+size 846674
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..3668adb1c7a65d97b30fd022bdd03c2a82dbea20
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91cd7ddf94aa9c8c2709e91ab41c02d9df24178301268d3419bc885225e6c4a7
+size 17429504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..aec4e5b364661c21605b6e40d03702c97b84766c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16c23763836dd26e919ad90ae3468b9b403cb7b5837ae6d78855492f82f7a6d6
+size 550215
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2fb1d48b66b9e50b5839c9aa4d37f8799f43417c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7997d0baa4021ac950d1da5ec76554a66304b3df016b09d2b8a29442f06162d
+size 27423744
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c8e5192bb52c3572b85de820fc69c7c7f0d5e554
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:193f0170b2070508c29b73a34147cc0ef41093dfd392ad00c298c497851fec0e
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..82112ee87078785072ed9e0fd1ff2d2138413583
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18386190751b06343cf878b8ab1e08af41835421d704852237573633f6fccb36
+size 20337664
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..09f1dfe2d8cf70564a73a07c239fdb1dccb57e14
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6554b9ae335bcff94287a3935413d73cf41cf2440680dd60901bcd5c2dfd0f5
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0dd0ac8cee0e65ebf530fb315fad56983a43583e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7c76b9993f58c863f53cb959b1abd9e72294a559ce0f3f31b61d85f48296e25
+size 39588864
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..9eeb4fd4a31b1daba0b3764925389510796513a5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1309d272d4b842791724416530cc80bd79c21a9bb7fd75aa2f091b23f3e32dc
+size 846875
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..7d9aa61c942100ee67867954d1900084a3464a87
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92b3329029acbd5b3a0d4d0d909a9f740b14880152428c73c9c746e53443592e
+size 132250624
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..c20bc1f3b548bc8ebad0867a810ba9beeb3ddd26
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13aeb85cbd16415a535885cd6109ec2309073297ab6aeb89e0b06ea25538183c
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2d9d210da823b50c28705ed8dfe03e527751e176
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b417e8e9d242f6c0c0087657ea16bdea7b1d38ded56dc12bf71b94878a24f44a
+size 64400384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..6050a03830198e9011d816930f88af8839e4df77
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4ff14415dacf91d87cfa45303337ff5a2a75bc35dd144b7cef9bc0d321ed066
+size 625637
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..42e9fc8190f5bca8d78ad81a9637080762508df3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f93b45d02683e7e074a5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f93b45d02683e7e074a5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-06T12:01:10Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022]  DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..149bab8984e023fc8d49795d3ee6c5ada12e6949
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca45ae226e38b8ed0481e7a7f400c4fcdedc21d707078d6091d0e33a843023f
+size 857100
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..781e54eda6ce8447992800b069a04a9723df4d97
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9be8cf37c5ae13220a17e926bae3a26fdf664413f3b6d8eba44878af2b4c5c09
+size 112374784
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0531aec1e646c1f89f9fa638adbd3437f51e6ca2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea262273011cb56f83953d26495560b143dcab5450f902a3079e42b1722f0c0a
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..d81ef891d15a7f43296b4d411b104a188866a9b1
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cb8d363c05524b270d5981142b54c8964af339efe34a51bf60e7621d23250d5
+size 56259584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..41daa38fcdef711aee99d1d8fdfb67b95ce0dd9a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b993ed6952483a9fa8b7809777b0a808de959c6265c8baf22fd4fde723991267
+size 846751
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..b1f977ab33f25e0ff77d0eb1429e36584ce56961
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log
@@ -0,0 +1,71 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fbb4295cd419661a4ed0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fbb4295cd419661a4ed0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:46Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 481036337152000
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+Replaced 0 dropout sequences with OffloadedDropout
+HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple 
+Invoking RemoveOptimizationBarriers pass
+Processing partition 1
+2026-02-06 11:45:46.843430: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..273f0211b88cb913016afedf51859c57a7c5b372
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65027e7f6d6b6ffe66f22eb81fd095ce1180f3f3c2f3815f136134cf73f433ef
+size 854933
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ca323139cfb3c3fd81497d3b463321d14f89b7e4
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log
@@ -0,0 +1,67 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fc374c20b3ea2e1e5432+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fc374c20b3ea2e1e5432+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:44:46Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5611
+             convert      1055  18.80% ################################################################
+             reshape       766  13.65% ##############################################
+           transpose       687  12.24% #########################################
+               slice       543   9.68% ################################
+           broadcast       478   8.52% ############################
+            multiply       363   6.47% ######################
+           parameter       328   5.85% ###################
+   get-tuple-element       324   5.77% ###################
+            constant       223   3.97% #############
+                call       217   3.87% #############
+                 dot       181   3.23% ##########
+                 add       145   2.58% ########
+         concatenate        74   1.32% ####
+               tuple        73   1.30% ####
+              negate        72   1.28% ####
+          all-reduce        72   1.28% ####
+              gather         3   0.05% 
+                iota         3   0.05% 
+                sine         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4293
+             convert       911  21.22% ################################################################
+             reshape       794  18.50% #######################################################
+           transpose       398   9.27% ###########################
+           parameter       328   7.64% #######################
+            constant       258   6.01% ##################
+               slice       252   5.87% #################
+            multiply       218   5.08% ###############
+         custom-call       217   5.05% ###############
+           broadcast       184   4.29% ############
+                 dot       180   4.19% ############
+   get-tuple-element       180   4.19% ############
+                 add       144   3.35% ##########
+         concatenate        74   1.72% #####
+              negate        72   1.68% #####
+          all-reduce        72   1.68% #####
+              gather         3   0.07% 
+                iota         3   0.07% 
+                sine         1   0.02% 
+               tuple         1   0.02% 
+          all-gather         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+
+Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0
+ModuleSplitter initial partitioning... #parts 73
+ModuleSplitter initial partitioning... Done.
+ 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72
+New disjoint wave: start 2 len 70 NumReps: 35 macs 6003333487656960
+First non-zero-mac/used part from the end is 72
+Not enough zero-mac parts. skip
+ModuleSplitter initial partitioning... #parts 37
+ModuleSplitter initial partitioning... Done.
+Remat: gather-iota 0 matches, 0 ops rematted
+Wrote HLO netlist to hlo_netlist.json
+Wrote graph partitions in debug_info_hlo_partitions.json
+Processing partition 0
+2026-02-06 11:44:46.083167: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 71066314242 bytes (66 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..4a9415195c7fb1c0674fe8523caecbd16e6a002a
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0600becadf8552fb59594f02a14f4bfeb250fc3194d33027ebe09840e3affcd1
+size 859109
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2c94374a1bfd4dc955179c0dfb93801405205c2b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:918e6483fa8a5d2cb9bb96eadfd58c9ab4fdcaa3b24bd3c11a7c2c793026a228
+size 264336384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..0361a27afd2d6a653d13896640c0cfefb7d46cda
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb22832d48af2067f57308dae569b77f374614c0ddb1f02a217b3c2a1b6bdf21
+size 550227
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..6f6ca546a3355432232e247a5d0bf03e1c7a4262
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1569e87331d58ac09893a7e7774f29170a805dd46b9c630ba04fb2f9e30fc4e2
+size 66939904
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..bd54c6b6204baca08d172b09e49e68edc6590457
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:371d829661988bdab0d0b51034566239f4e5700d09dfb13785baef443276e300
+size 841224
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..47f7d15512443275481d6027f25410205df2eb6e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log
@@ -0,0 +1 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fdfc889d20b9ef751c6c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fdfc889d20b9ef751c6c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 
\ No newline at end of file