diff --git a/.gitattributes b/.gitattributes index ca23b1e23a58ddcb413fd792c7c12d2776cecf34..0052adf2c50974d6dd02320dc4b54f6160f53370 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16330,3 +16330,105 @@ neuronxcc-2.21.33363.0+82129205/MODULE_e9a0f2507a4369d1b554+4eedbd9e/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_e9a0f2507a4369d1b554+4eedbd9e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fae17d1d3d800dfb2250+e676fd2c/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fae17d1d3d800dfb2250+e676fd2c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json new file mode 100644 index 0000000000000000000000000000000000000000..c1cb19fd89d2c950edb510d0f07ef7c53bc28df5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/009b59fa3cc87705bbb4.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json new file mode 100644 index 0000000000000000000000000000000000000000..b60cf9c8824c38775ac0a5fdc47164b852f44e75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/06dea0fe4ee55a8035aa.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json new file mode 100644 index 0000000000000000000000000000000000000000..104859b1e2d3780b416da7789d57fb3b2b92f6d8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0ccfeb749c49d4002400.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json new file mode 100644 index 0000000000000000000000000000000000000000..728fdb0e486926f9538d2c332670fb41cd3cf9e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/0d6dd8f35029b2597ea3.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json new file mode 100644 index 0000000000000000000000000000000000000000..935d5736785ff22f58422256bea2428a54695167 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/12054fb3f8fa1b1fc9a6.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json new file mode 100644 index 0000000000000000000000000000000000000000..ebbd7b17f9de3d8c98847456308b9a493ac9a82e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/26b9c01e8f46a57f8933.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json new file mode 100644 index 0000000000000000000000000000000000000000..04a93e8b9fb9c72110aed26659bee80ecf318c09 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2f14fea94ed53b3b1d94.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json new file mode 100644 index 0000000000000000000000000000000000000000..183a4d868260d1e63d68fb9e19d7a88aaed42766 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/34bc113dd21cb74d6179.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json new file mode 100644 index 0000000000000000000000000000000000000000..02d15d288b8ff063ad0f33f533f49eb475d08fe8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/44994da16f213a28fa4c.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 16, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json new file mode 100644 index 0000000000000000000000000000000000000000..0df64fc287ed7fb23b1265271e4aff764e841220 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/55d254e5fde0dcab0e41.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json new file mode 100644 index 0000000000000000000000000000000000000000..fe5ce9f7eb66523791def85051f4df5d070a1655 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/5e301b3a72a832a33468.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json new file mode 100644 index 0000000000000000000000000000000000000000..77f3ea40ea57ae10c6855ede8ed41b9bf90a0c34 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/824782edf021538e230f.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json new file mode 100644 index 0000000000000000000000000000000000000000..261935929e3d4bd72e006da09f291608693e4071 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8c7328c05cd751a24e18.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json new file mode 100644 index 0000000000000000000000000000000000000000..36b7e6ea1869704e268e565adfb0a5fd6c50cabd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/8fdc4765f723aa1ce54e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json new file mode 100644 index 0000000000000000000000000000000000000000..f4e83a758a9c3615abe7a88c0c3c919f8c5b5d1c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d0c0f90ad212bff9e2.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json new file mode 100644 index 0000000000000000000000000000000000000000..6aa3c24ae879251e30b0c87fd2354a44aeb645ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9d23ffa4ccbadb0a623e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json new file mode 100644 index 0000000000000000000000000000000000000000..e737b32f47b5165f85ed4e18c4562e12a6c36363 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/9e94643f5e1e669914b1.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json new file mode 100644 index 0000000000000000000000000000000000000000..9634af49362c0758366ecf227a1566b16b6079c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a41e0220fcb4438b8502.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json new file mode 100644 index 0000000000000000000000000000000000000000..adb262bfb726feed9a1c483a545181e1e35d078e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a45b5270d576ce1c1a2a.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json new file mode 100644 index 0000000000000000000000000000000000000000..9beca28fb029c2831d9413f0296cd4bfb77e1991 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a52b33ce4a3bb2a7e4bb.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json new file mode 100644 index 0000000000000000000000000000000000000000..5de71287d36406fb891c51501f5435bb3cd66991 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/a8ca7ace639199dfc385.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json new file mode 100644 index 0000000000000000000000000000000000000000..b6b61c183cc1a39234f0bc97b915ae1b2916b950 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/aec994126b22dffefd0b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json new file mode 100644 index 0000000000000000000000000000000000000000..17ab88b9e63ce53b5f3eb8af52cca7923cd26a1b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b0bfc6ba654a35354148.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json new file mode 100644 index 0000000000000000000000000000000000000000..9e5851cd85eca64e066a04555b260cff32290229 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c2f89131f4ebe4bff600.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json new file mode 100644 index 0000000000000000000000000000000000000000..1d4e65d52008765c7b932dfe1e7ba50142b05c08 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/cade97aae05512df69b7.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json new file mode 100644 index 0000000000000000000000000000000000000000..897086b125190f70ffa4ba77955b1a2646bf9d83 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d0e85bdeabc9387b9465.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json new file mode 100644 index 0000000000000000000000000000000000000000..e1e6f21bc0e52224537e1063bcb331c07f171c99 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/e18fc7451c6075c8370b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json new file mode 100644 index 0000000000000000000000000000000000000000..a28042acbf2225c3460774fe40ca694ef319e4bf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/faad5ad36bf7017146ca.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json new file mode 100644 index 0000000000000000000000000000000000000000..9fa22be7a1db3c8c64289209e049ff1bb8324f40 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/037e5e6465c54b15c148.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json new file mode 100644 index 0000000000000000000000000000000000000000..b7e155c2e214e501ba567db70b1547c5115b981e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0a87f0b5505c0205b0fe.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json new file mode 100644 index 0000000000000000000000000000000000000000..faea60ba6232a617e544cb1033f255bb8bfee267 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/0b522d9b8f350f2b9470.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json new file mode 100644 index 0000000000000000000000000000000000000000..04866f7256b7d021df6ffb450a87b6a5b7ae7f7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/15c1d0cd7c43f7529ebf.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json new file mode 100644 index 0000000000000000000000000000000000000000..0b1229bb93720505cd5a184b4aafa307bec00e38 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/17cc48d52c0270cf758d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json new file mode 100644 index 0000000000000000000000000000000000000000..9206d12769ab30638032dc08d81929f2576ec303 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/18d3c8de3577b20aa52d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json new file mode 100644 index 0000000000000000000000000000000000000000..8d9d7b24ecee90f915db7714ec08eabb3852a6b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1ae13c97fcec1904a39e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 128, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json new file mode 100644 index 0000000000000000000000000000000000000000..0dd1543778a27a5dce5622f6808855b28abb0883 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1b5c4b51d35b993cfc31.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f86c36c44b50f783de388fa6ae239c9a64ba5a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1c5b4f49558970133763.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json new file mode 100644 index 0000000000000000000000000000000000000000..4bb2f984c6e589577f6ce63b78179892a9247104 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1d5c7a4172eecbff1d7d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json new file mode 100644 index 0000000000000000000000000000000000000000..d6131f01c697eeb14cbcbe7e427a41257dff1c2b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/1dfccfe2000dfb41ac92.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json new file mode 100644 index 0000000000000000000000000000000000000000..988036127b47833e0581ed49e8aafb7d6f9d1ada --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/258b3111ce5ef334fd48.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 16, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json new file mode 100644 index 0000000000000000000000000000000000000000..748250f7ed2ec66d737ecd1550077444ba2ef64d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/28eaa012d62c4703d0ec.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json new file mode 100644 index 0000000000000000000000000000000000000000..4d77b4d9cbbafe1d489309f80609a8cc5f3e5bc6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/29c603c74f22bee8fac1.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json new file mode 100644 index 0000000000000000000000000000000000000000..77d22824626a431752233a917eae46630fa68c43 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2abf9a791ce0a27f930b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json new file mode 100644 index 0000000000000000000000000000000000000000..0222364189539b926aa60fd2dae6e11a789a5804 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/2eb748099169e101b60f.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json new file mode 100644 index 0000000000000000000000000000000000000000..0365d4dbaca59d0a0dfeb8710375147c18270c50 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/34cb8707f20bc4e730b3.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json new file mode 100644 index 0000000000000000000000000000000000000000..0684d9db345ee8b4b3920b450af70ce3d4039f9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/3fbfa290e3e6c135ba01.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json new file mode 100644 index 0000000000000000000000000000000000000000..4bcec7f313e682211601663ff4663b936bd7bc42 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4141b852f55b204a3cba.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json new file mode 100644 index 0000000000000000000000000000000000000000..2398252139e8f7a60417b250ba1419737600e76d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4291b2494794b836ca55.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json new file mode 100644 index 0000000000000000000000000000000000000000..17f4a9bdd9772647fedb05c1824e0870316a48ba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/446652a0cef9be9bb531.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json new file mode 100644 index 0000000000000000000000000000000000000000..b2c2ac5bbe22fce1645c529c09de4fb1976d1141 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/47bbcd5fd7a32d297869.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json new file mode 100644 index 0000000000000000000000000000000000000000..4c7f9fd24ca129c601866810f214e599294745e4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/50fe43187bcf511c7dcc.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8cd474e06d6233c455631e530c59dd6b90d2c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/52dc45ae6c432aa63bd3.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 32, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json new file mode 100644 index 0000000000000000000000000000000000000000..568beacc4ecdd8b3b907a4aa00631b64936e3e3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55c3affb72b8680efc8e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json new file mode 100644 index 0000000000000000000000000000000000000000..1d31b94f5555b6a3482afd0fb2ef1983b8211d69 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/55d7a6c45f285286169d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json new file mode 100644 index 0000000000000000000000000000000000000000..06da23fab24a8be819e5c5262c49991ee0fff1d1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/59b8053b641a7fc434e2.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json new file mode 100644 index 0000000000000000000000000000000000000000..5d6239b323cac9b9ed2acd97be8d1530fd545374 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5b51e13e67d6986e6c23.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json new file mode 100644 index 0000000000000000000000000000000000000000..88e086452bc28a3b27e2d58763c1e8bb33e3c891 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5c0403963e60c09412d1.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 128, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json new file mode 100644 index 0000000000000000000000000000000000000000..5b7a436cbd8c30e6790d14a6975e6089e064835c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fc13e1481781b0be738.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json new file mode 100644 index 0000000000000000000000000000000000000000..7a56a7adc3d2f37f3eb34a0127804fd55cb15ebe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/5fd59e319005f4c46b67.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 32, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json new file mode 100644 index 0000000000000000000000000000000000000000..b7eef340d9839dd406f60e5e09074f34fd8a0f23 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6073cdc14493659d4fac.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json new file mode 100644 index 0000000000000000000000000000000000000000..abcd66d053bedca903ba3f9781fc4e33559e139d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/65deb864ed5a1df60c5d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json new file mode 100644 index 0000000000000000000000000000000000000000..9dd781cc653c843ece86b76c06d2efe1027c958e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/686d10e37a0c31b7c3e6.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json new file mode 100644 index 0000000000000000000000000000000000000000..f0de5c58cc1e654470dfa23b68e4c71194cac814 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6c5a91212433422f7c23.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json new file mode 100644 index 0000000000000000000000000000000000000000..02bf2422e97376c27aaabc1e96320ddfcc60dce2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6dec282ed0d4d524c63e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 32, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json new file mode 100644 index 0000000000000000000000000000000000000000..03e76dacffe7d45bcca7c6d5ba10d272c1333b6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6f0c9a6f105eb4431d09.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json new file mode 100644 index 0000000000000000000000000000000000000000..f592c3520baf8acf7c3281c01ad53b7bb239f02b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/70bd16b077dea9efa87b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json new file mode 100644 index 0000000000000000000000000000000000000000..f23317af00ea4a695da65ab007a966050c2634f0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/737b91a39739633287e0.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1a57505d02a0d2a2de6d5ff3f1ea7a3b52ed85 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/74754ef6e14fbc5de558.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json new file mode 100644 index 0000000000000000000000000000000000000000..0348476e62387d650567affb4c6b63d11eceba29 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80ad31403a70da43747b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json new file mode 100644 index 0000000000000000000000000000000000000000..78aaf9560a2b54ea942230688788d017587b94bb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/81cdfe238a28fa2dbb09.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json new file mode 100644 index 0000000000000000000000000000000000000000..aaaaad71f3371e3ebcfa572a3d0f19e6ca1dff6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/85a0744fcfbb2154aed0.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json new file mode 100644 index 0000000000000000000000000000000000000000..8854f84d3ef91e444cfa2967d6edc352da187af8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/864e5a09d4809bcb03b2.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json new file mode 100644 index 0000000000000000000000000000000000000000..88320dd0757ab97fc24b8bb5429f5ec6ceeda3a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8b3823a766868796c263.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json new file mode 100644 index 0000000000000000000000000000000000000000..32094e3293a12398687689d8ec223d16c9c92450 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8c5d374a063a595d035d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 16, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json new file mode 100644 index 0000000000000000000000000000000000000000..e91b00f030f4749ee2499e432cd510c9958c23a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8cb6290db413c4bbd474.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json new file mode 100644 index 0000000000000000000000000000000000000000..d3211b308a4a86664376bf92f6b040b4b99f612a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8d712d60cd7845642e5b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json new file mode 100644 index 0000000000000000000000000000000000000000..c11bf423ae75762dbf9a2fb010407901032174d8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/8e2b4ab087df79ab0148.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json new file mode 100644 index 0000000000000000000000000000000000000000..6fdab38f9eb3953be83713ffb31e32258c209e88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9059ace389376057f365.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json new file mode 100644 index 0000000000000000000000000000000000000000..fea86f0c5d116c2671d46678692da7c10f9a9afa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/92ad09670bfbe78598aa.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json new file mode 100644 index 0000000000000000000000000000000000000000..4af191c0f0db05df05a4a5e9b5dd9edb6f1722fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9b56e73c2b369f3766be.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa9e5dcecb56df62e4848a85dedc232a95efebc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a58bdf757c00fb4913de.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 64, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json new file mode 100644 index 0000000000000000000000000000000000000000..fdaddd078d67ba8f4daf9033dc568d534a26c8fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a5a74d202e1ba0c4b0fd.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json new file mode 100644 index 0000000000000000000000000000000000000000..89c8c6b6ccc34216a6f3138c38f2c40d26f5b02c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a6e16d14f73f55435991.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 128, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json new file mode 100644 index 0000000000000000000000000000000000000000..2f03bbaea1e6d6c2654933c64e41c6cae9d3e4c8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ac0082b3396a7f33f380.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json new file mode 100644 index 0000000000000000000000000000000000000000..2cf2c89e79f79c5205c3790a21a7f7bc42568b86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b30c2c558a761433508c.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json new file mode 100644 index 0000000000000000000000000000000000000000..a6d9c6e1f6d40fa7cc11a013f00f58833669c226 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/b92f3bad41312155a2b8.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json new file mode 100644 index 0000000000000000000000000000000000000000..e8619e682f7566290e8f403bb6d47684016f7c75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/babd36831f874fd0cc0c.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json new file mode 100644 index 0000000000000000000000000000000000000000..81ed6ba6a8b2a65a70f690be78d6dcb90c12e07e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bb17df19f09ba1a7cc7d.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 128, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 128, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json new file mode 100644 index 0000000000000000000000000000000000000000..4f11c663c02fe26853782b8c279b18617590159f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bf48e90b2f7eb17cff03.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json new file mode 100644 index 0000000000000000000000000000000000000000..0d1561c9164ecb8ff81a2eba7318f82f97ca6f54 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cb305fa12ee6269ee201.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json new file mode 100644 index 0000000000000000000000000000000000000000..20f92fc5ab329a72e411928aa7d3bff7716eac69 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/ccc1ca9219f944cb2ddf.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json new file mode 100644 index 0000000000000000000000000000000000000000..ad614dee47ee53b87f7c30578345cd2782a12b2d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d1f2adb5334c2c85fccd.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 16, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json new file mode 100644 index 0000000000000000000000000000000000000000..b811c3ab4c35aaf9b612a7388465566d96ce9971 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d29396862f2fe00fa073.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json new file mode 100644 index 0000000000000000000000000000000000000000..e2f9b07e03c84487c05831e4cf55b2f41af7c138 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/dfaccf7db532b86dd514.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 32, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json new file mode 100644 index 0000000000000000000000000000000000000000..9c81cb3fa8adb2d8eb8c9ebee1d3da467b9f5c28 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e4e9990b6191c28ea1d3.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json new file mode 100644 index 0000000000000000000000000000000000000000..6a3682d8c7bc8337f6b8a70da585e79b09c63f3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e8e8742e33c6a426fa2b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json new file mode 100644 index 0000000000000000000000000000000000000000..22af7da06d2e5b52395579cffce8b7a79babc3f5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f0558983e2906eb0b5ab.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 64, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json new file mode 100644 index 0000000000000000000000000000000000000000..7d0abd972d6f101ad1ed36a11083c53df3c32e58 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f31b6aa37f1188a14d53.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json new file mode 100644 index 0000000000000000000000000000000000000000..f77a061127b7f546832c6465ab79e205fc0d6556 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f7859387812f9a6c1357.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 16, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json new file mode 100644 index 0000000000000000000000000000000000000000..4c1d5acabc45c5e84abef1f5f3dcbb516b029624 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/f9cc3d2168d195633865.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json new file mode 100644 index 0000000000000000000000000000000000000000..be3f0dc7cf94b2f272cadbf6644dd5268f9609c7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fadf26f67ccc2ad82a8f.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3e99d3c4e50e6c50fb51adaf42a1a53d512b40 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/fec7475005632b11ba52.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..efdea639f05520b34b97126a265ec2b76b69228b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b39f8c8ede97056dbf1cb4c157201cdd1b3dc9c4c4148c372238dac197dabe +size 781744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ef497462de8957836e6e19933382f4db863b4ed2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00246f51d08f7ed27839+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0b3d02ba2f12392f8ff13a9bac92917dec779134da0ba5fe17cd77d002ecc4 +size 93686784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..51bfc0e7703ed7022db8bbc00d05ad6a0a61b013 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c343e08b825af3c705c988ed225312d9281ccf8d2a025b82a5feb5ee0eda733c +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..01f1f8414c871ea22a3659d12b1fc0e310472727 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_016ddc6b7463af23bca0+fb4cc044/model.log @@ -0,0 +1,10 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_016ddc6b7463af23bca0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_016ddc6b7463af23bca0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: Process Process-1:1: +Traceback (most recent call last): + File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "neuronxcc/driver/commands/CompileCommand.py", line 1328, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline.print_dots +BrokenPipeError: [Errno 32] Broken pipe +[NLA001] Unhandled exception with message: boost::filesystem::file_size: No such file or directory [system:2]: "/tmp/nxd_model/encoding/_tp0_bk0/neuronxcc-jbjstf3g/sgLnk/sg00/SP.bin" - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T13:03:00Z [Errno 32] Broken pipe diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..40abf8693db25a09e22f7018873d7349a416cff8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72a50b70b6f8c57d4cbf340d5ae6337b5bf294cd016fa63e5ba34d9d1e9987f +size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..023023f90c5498b40d3f43545a97851423e1af81 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_02007c49982251cdfd74+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba69cbd35e068d83852f182df9cc367124ef09435c1edcb9a3508935b72c9c4f +size 108749824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82f267dd197dd8829a1ca54266b13b89d70e5817 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76bb3149199522b006a2beebd0588443b40ab808ca68b19e05f30c9b787952a +size 846751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..c03d4a5d48ad728d5de9dd19ff1e8738433b5b40 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03ed4b3598344bc45191+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_03ed4b3598344bc45191+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_03ed4b3598344bc45191+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:40:38Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 616929102397440 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 16:40:38.383906: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3f8253996f8fe1c07fdd1e75c2d308c9174c23b0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97ecd244ab89b30b527a4a373918f8ca1c7c3757b597aac84e2308bb7644f35 +size 848822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..499f3e68ee743e64cc763dc616b3b196b3d62d29 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_06a49641d496eff74287+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_06a49641d496eff74287+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_06a49641d496eff74287+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T17:24:22Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2157447972126720 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 17:24:22.298526: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22668980354 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4494299d2b54049f9a288e2266b5f81dcc391067 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7067a69191f11142abcac660df873ee7d51f397a3013c7c85bca230366e62600 +size 626221 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e507525c9fa654c1be3343f21545573036a36d7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_088b4abdeb935312f5cc+fb4cc044/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_088b4abdeb935312f5cc+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_088b4abdeb935312f5cc+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d620c520f7b9ec0c3eecbe7ecb0f59be716b2664 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2019a93809269243a0e89e3f6a01964c47bad5086315970256491699d7a65484 +size 774422 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7c1aa303d8693d819239899b11a626b5d1c6ed79 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b5ceaefeeecbe146efd+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53883517848067b13d15b5408afce160c7dcf97f25ee1b1f56cc7c7d1e9a469 +size 12166144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..73bb027d1691d4a2d006f4332aaa47585c9f1bdb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ba396bc76287eb0372b4ce315e793865b52d14ce7f0b873fbec1d83b857e3d +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96fb618f0d5a0948a4b2a86ee620ab0d0f710897 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0bf0f7b24eb5eb88ff95+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fcfd5f4aef962bcfe0dd93d5ae52cf21e45d445dd4b5eed000e826fb142b3d +size 44432384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07473b1d3a8b41834281798513a9b09c9cf0778f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e85caebad66f7af0d4349a72e3965c9f17932965a1183e79f3905ad48f15470 +size 619281 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2e8ece9c155008779d6ff3a47906eb4a65a108c8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0cbceb2b35b400eada58+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a99e45276abb679987d9c265231d6b0c7356bec94161f51b877c7966dcae270 +size 11531264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78f4d522c3bfb2aa90a43821b5208e9f35662e17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fd03e5d2c760ca5c15120cbdf152034bf8fea70e26a7253b66b8a50ac90940 +size 775286 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1eeb8024b3395efaa32e9a7c52c9ab3c77a6c2db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d689ac638673eadd329+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c6376fff4c7849cba41f46b62b742375f4ed372ad632e3cf40dcd54680672c +size 23727104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b459e48d4bf274e02e2f3a902fad7d9ac8a1e74 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9288525354ae6d6d403c09392b2e11323616ca23fb6d8937eb8ba2dfbfc7fe7e +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..80bc8fdcee97f125609b8ce4b978e17920496e10 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0def30361c2a503f43a7+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0def30361c2a503f43a7+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0def30361c2a503f43a7+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T13:50:04Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1e37741bb83ff5b87863194b71f73d5b68249ff5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb141c6e5f09b6aec1932aa2922b11ee9aeadb07dda35fe0cae14f6f4271a60 +size 857165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ff6c89cee58b289422dcb1edfc30bf961315962b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e96f0aeabc8c05c5cea+fb4cc044/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0e96f0aeabc8c05c5cea+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0e96f0aeabc8c05c5cea+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfbfe48f894a969e491c8297fd246d4adefb2f83 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93fa8af1e0c0824afc8d4d920d07f690271fc7dd4d4b002a8187e3ddbb40cfd +size 855517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..f03006a0703f99e1b77c83e86d10761feac7e4eb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0eaaad5a2f067cafc190+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0eaaad5a2f067cafc190+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0eaaad5a2f067cafc190+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:55:56Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 794912547143680 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 20:55:56.656334: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22617826114 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a06f491d7d910fd2ec7cf902bc1d41cede682270 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2830b2b48980845ad6e95f9447c96fd5f455a85d6ce8e55ed97cee7711dcfb +size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..b323de209b422ac6f91b4b4ace0cb31d1f9e4a3c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0ed7f068c9d503e4a065+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0ed7f068c9d503e4a065+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_0ed7f068c9d503e4a065+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [LUR015] Compiler generated too many instructions (5808927). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T08:54:45Z Non-signal exit. Backend exited with code 1 and stderr: [LUR015] Compiler generated too many instructions (5808927). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9b339e45c23409dd194b43db157a102b4e351c47 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e8e336d39559f3b2d022f9ced532b9e615dd9654ce0d4abf9f540b3bf776d4 +size 619065 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1e1825cef8337e7216c3ddb5ba0d7e2b112b7ac1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f17ee81390d681c2228+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226ace9d6d26ec64613019920f62d09237981fdd897276090129eafcbd17f6d4 +size 12585984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..226529e631b1a8f48c605e375a8de0195b6208cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa4bd283a4889514f87eb412b57462a4c42bbb896ffed2a3f69aee3492ba1c1 +size 32543744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..55d17dff428f9891c86a423ab90013c16849a57d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4844263dbfe2d31a6cea613508f23c8c7499a3ac7dc6ac30adebae178db55caf +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a4d4aff68a2d7329e86d74679ac4278bd2fd2f17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11c8f427ce1ae4c2006e+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de3569a6fe3533fd014a55388c2794d93db78f83f6a26cbf5563fc26c9660c7 +size 92263424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbc23aaad77f9db89407242bca7e5507d711fa2c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81315b651b9b0de71acc007000feba008ac6612f7093c71346746a779c29c663 +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..307a2e035da05fdfc36800a2619d9e077edf2544 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1242df1c9c717d5ab4d1+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1242df1c9c717d5ab4d1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1242df1c9c717d5ab4d1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:38:11Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 596485058068480 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 12:38:11.044086: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b88ec2cc6be2bf1f5c2ee8dc3ea4572786edd94e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e7d18d267b036e4a3b3d5ab592556bab5c6e5125aa84f96eba39c06eba0b1d +size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..efe10f86d50a55fa7ea1bbd0e0aa7d095473cda2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_132472d9d56d1f6e6341+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_132472d9d56d1f6e6341+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_132472d9d56d1f6e6341+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:39:58Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 4314895944253440 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 16:39:58.774411: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45234335874 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ffd88c74c4d1ca21a5e9d6a3c219cb287bd87a7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c4e4ff3e23ae7a276f13655e7d9c4248d2e58c608de8d97b10e9e313648e81 +size 866231 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..3b46426c896add27c0773df7acd0b15488eebf05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_136817b0b36ea4e43764+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_136817b0b36ea4e43764+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_136817b0b36ea4e43764+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:09:18Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 24013333950627840 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:09:18.156141: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 13029571 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c12b6d443b6a410f669a8c4e45a89b2365b57b9b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642febb32fdc03356ba70a288f89de64ebc8f0c0a70ac2dcc357538ed8e000b6 +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..abfe6d557bbb7c56314fb00de848a9d403ea52a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ed09d734be56c75027+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_18ed09d734be56c75027+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_18ed09d734be56c75027+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T18:29:12Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ac971d5324bedd274c62297dc1457efb177db91 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dae5bb2894dc0c7ea22a320b4b2de1774e3f5b385da2dfd4bee2d6f0ab9d0e +size 855517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..fcd33376d2c690d8f614fd781a0d4de4b77a9e32 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a3939beb88e18a29e1e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a3939beb88e18a29e1e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:34Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1231453023109120 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 14:28:34.090499: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35534210050 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..182cdccfcc22d9762425ba6c20369b9ee9741301 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae3632a5072d91624b76d190b5435e08c58ad0d694bbb070627e66714c00d34 +size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..16154c5faeaadefed18d6cf62f4c0a339a10bdfb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a41189de75fc0ed15bb+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1a41189de75fc0ed15bb+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:07Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2385940232273920 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 11:45:07.082290: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35616056834 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f22a51f351d0e1255dbadfcc83d077fb62aa5e8d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17699cf211a5420bbed1471be879e15183d7f4857245f9071b7690ea8165e192 +size 626221 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1adef620eea40d4f028faf45a526e536816cdae1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1ab71ba131418d320f9b+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e193be0c6a9aeff099b75d11e438d7658b61e7d9e2b3a0648dc16195f134173f +size 23491584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dde8ce32a1a0da51d58eba59b673ea1dafb478cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c082016fb78c0694a6b26131ba3769d0366bdd1be75fcf052dc0599f9da811 +size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..5237fb26d1bf135c966e249cde74fb270ee57951 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1afb27ccf10708e03947+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1afb27ccf10708e03947+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:53Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 538760697610240 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 14:28:53.907897: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad82de52fdcda7882a6b0071fba1246c8abcaafd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d531d1284d95f69f59e96f0f0b3f90c267d794dd73287f9c6a6f3e423b62c18f +size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..c0c458ce4d789162c5f6aaab8e4b09f1778c60ce --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b1cca7c4f2510fb0052+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1b1cca7c4f2510fb0052+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1b1cca7c4f2510fb0052+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:10:08Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 519519244124160 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 12:10:08.065672: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..29e25c6a7dc05cfb1d211cf3b58143f1fcc5049e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d65491ea0ce378fe4c09ec1896a2f246b6036feca7fcb1b76b78dfa36f7d79 +size 866087 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..d97be917389918f9fd9103a09c49c656dd6f8e8c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1bbbe37c501b4a4bef6a+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1bbbe37c501b4a4bef6a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_1bbbe37c501b4a4bef6a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:09:44Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 9543760929095680 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:09:43.960123: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6514787 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..96bf2237230d77d6828fd0e3bcc3719d467a790e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181a67a9bb84283f0586e9c8de0b48cb6c2c82a60455503033a79b0095a44338 +size 781521 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93da5e8bf1a4ba9fc3be7e8ec480ace4ab2f7f56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1c279d75c64f4c7f28cb+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6856af256c6be61e29cb285efcb892115af757dafe16df88291750588fbc5f1 +size 15299584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..10aae86bd7104d74b6e509af35c4b28dd5b41749 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bcf89b72fc6c860bab459c3165b11f81494f1b426956fe68a2a8e10dcd2cb5 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..367c5f92cb61b3afa06cf9c27f6fbb0127282d26 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d3dabdabdfa7a5c0ca9+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd66e9b9b8ba0dc67efedf910ed5909e1c7127c76698737f071e2384ff80d2b +size 44104704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a38e6341b98b39c237ed804febcf294b060fd59 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f0d7dd30066f1f3fec6a55fc99687b04d38571ccd535769980445c0ab375a98 +size 618478 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2f30c957f919ea0ab1aed62c5193893c9de0b239 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1fb1d66f34fd07cc4310+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b12f140b606972dcc79469fd77c52e0e8f1d687258d3552cdc1dabbc8e0487 +size 34694144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..018dc5ac6723d9a590602e87bbaf3ae0afa1b59b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd6c4a2f2f52910be353642f920b75b5c70028dc2bd1bf23297e60672c73f53 +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa4c1a9db5bd2e3995fec17513f55327578cb2dc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_210b72f81fca8bd7952f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4d7f38346c34762dd26d3898ef07aa88158ec6296436248646c3066b25475a +size 16006144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5107da4fec9a51ce03b3859b6edee996c954993f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3a5ae6a70cd1a89311771086b96da6d3866308a17ce66831077dd386566f37 +size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..248527bc3bce829aaa09a0aa5552a5bdb1ca5eaa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2199f1e99b44680fb5e0+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2199f1e99b44680fb5e0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2199f1e99b44680fb5e0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T21:12:48Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..326e244109914c388b3585149811e879d9c7d9ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34bff56920eb421263097551215c34b7a72bef0fefec7c8d1dc5421a8e550976 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..188dfffefb9a29243faa6a3e71dfb2a0e42b97b2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_22c326cc7ae69de46c3c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d530e15cb554137bc29294d99e0ff0a4115c7b28036e97a1eab7f3c60794c0c +size 34110464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..061218797e3c618c71e4641ce6ca0b82ea9805a1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53e38e1c6ebd3342f8665943284d0d9eb20c5be013901ccf3d70cb4d7b858f49 +size 782117 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..c6ed05d6da89d1f9f2ad3ae3ab74474be9e40029 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_246c7eaf3ccadeb94412+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_246c7eaf3ccadeb94412+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_246c7eaf3ccadeb94412+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-10T00:18:11Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9b55b8fa90c1855e7eb820a015dc4b33f02d4413 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7965a675031fb6ab8179087e299eef62fde401cc47e3276351e8e7ff0f2022 +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..5db499a1ab63def59bcdd193332e828b4145e756 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_269a21b3e3cb06fc787d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_269a21b3e3cb06fc787d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:09:47Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1192970116136960 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 12:09:47.166183: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bde75a0f775e9f7f040576c4ba25b4e491cd2f53 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52f889cce4bde7411563e34e6e026579b32a5112a22176fb6f304becc453f16 +size 855517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..24b79eb0eef7d4ed38857c087e6ba65a5daca335 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28bdef19cd14b2d3853c+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_28bdef19cd14b2d3853c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_28bdef19cd14b2d3853c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:55:37Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2205551605841920 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 20:55:37.125239: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45183181634 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e5ba6488288a292cc7859460952e33210eac0060 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489b8b60af9fe65fb3c4e3ae66fe146187b9eca7f7a7cf6fbc6374b96ea26a55 +size 774422 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..85984aed7c325cf25969ebe354b6c8b9eec2f259 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29374f693146a8400712+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e22d220ea0bfb979f5d149941f5fb80edac8a835063e60c8bc20a8270c60fb0 +size 124980224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71293912e7fdadd3b6894a6af75ed3de6f4d06bd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495ed5311d0baedcf0b69ddd8bcecab8ac1c1aa0efc36dddd64ef1a5b35ce65b +size 846090 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f06697ba09fac836605b27bd072522e60ea85966 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_294c539670d18fca3b7a+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0227ceadb981329c6b187032407cb9e302c52de8887a5f5af209e721153950b6 +size 31263744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2bad3e71b2a8e1c2697d7ec0d03d2168ea13f4a0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2a0b5e818b8f41bb54320b5c41942dd412b2f1968e1c7f98e3627147f603b7 +size 782241 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..9f82e59b6cd490ece27f970444e8ac96dfdf6aea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_298f485422f7fe2d7228+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_298f485422f7fe2d7228+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_298f485422f7fe2d7228+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-10T01:16:38Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a0e70c22379dc01fe970dfa4da4d1d24e677ab7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e352f7aabbb8f32935bfb9b0e864fe5f72058b1e7242ad53bb67a7a5ef8609e9 +size 838840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f1857ccfd9640fae3f5d531c6296d2a70bca685d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ad6bb08c01907e38254+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56cccf10d83902de77cc52025ccd12d7aea3e4f9d2faacf8addb7a8cd832591 +size 64523264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e0ecbba554f399ad213e46f5c71ee73f4212e509 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b04f4307ae0b96bbc8f7ad36629cffb7a1c8454c6c91722a79f9780cfd4659a +size 550215 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98a0d5f18f389c845e459887946c3789eb5acb9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bbf2248c52ac915eb6f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39f286e045b8038e03b10798014b7d88a336035972ee0710bd1e55661ee7d88 +size 14142464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..09860c8d31968598b97f3869bc1c74ad26ad3405 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb95029b0a9b055283732ed68dea01b1c51e3524237f9af21587bffe3c6b13b +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..95147992da3acfaefa708c2a778fcf876ee5b71f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2bd1d2a32a0abd395f17+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2bd1d2a32a0abd395f17+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2bd1d2a32a0abd395f17+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966364). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T10:30:30Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966364). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..364b643e63e72bdacc073b9e797c6881144554fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860bd5e1809787b36e7156e45530230ed6812cbf6aacf85de3408dfb426f4958 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c274324c50b2ae069cefd480c780baf4572b3ad6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2c8b27eef6e9052806fe+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc008453b49e30d34158d64a0b25f3e49b33fe8a3f52f4350c0849ee7b7929ea +size 91505664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..40807c7d272f1ee86970a9f2ae7b714754efdf08 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b9840e09349a912595d64582b179a2a28d421d19d291461b13165b193e3457 +size 846751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..c0835d9c418641891491732a4b24f0b48c170bed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2daa002f56d4d548461d+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2daa002f56d4d548461d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_2daa002f56d4d548461d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T17:04:31Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4bb0b17f925a4978f4c3bce062aa9f0672d1a8e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e526d7a2beeefc85fd089bf82ca9ccea2c7df35583681daadb43b95a6af19253 +size 625637 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d0f3a0f89321108d91665306eb7d75f4838afd43 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2de7ac18660b7ecd4dcb+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68832b4412d19978ea734fc6cd143e0c68c027ac1c71d7cb5fcb849c1b58216 +size 41585664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ea9589ad128cfc96b35307f8fa95761a86ddde0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fffc72ca664ed1a983e0fece4078b624c0dd34a0438d38564c1173995a8e6d +size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98ffbe0bdd492884533cfe99680fd293a3ec8d13 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3034f0c01273e0fddb60+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c32854ce61209ed5c1593fb20b3d40b33752b6928005513aaabcb077b0f432b +size 77589504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..98361beb56a7288fed4c68326d4bceffd7bd9d3d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d89a0080abaad7a6e413992a4eafb83aa9d7032080ed4552002aefea8bf698ce +size 550215 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e0d45345791cdf5ab90edf1b518e78a2b0030af0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308568a6877c754a5693+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be6e8db210047b0539458988d3f4809e28f37a254a6a723eb331e790c40cae0 +size 28806144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..89ad041225086ba9212de66803e1dcd070fa8a3b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d5244b1e947fc69cf989d0915e468b9ff2cea20fcf861f0330adaa8329d05b +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..6cac80700d6363a905d5714ec60b7adf99856ee0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_308c6613b83f2baf8c2f+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_308c6613b83f2baf8c2f+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_308c6613b83f2baf8c2f+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:34:49Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 307863255777280 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 16:34:49.617242: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c63d3f44fba51103ddcdd5689491a113e5d7055e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2884ad2f04fff8d288bcd367cc5d47f83b5c30c757b4fbdddc35060e212dc62 +size 846090 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89f2aa6e28c4b316d3f096cdd97c61f76ec97a08 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_329d10715e3353b4c19e+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b4a842db789861cc15ab17cad04f01f2dc567c54dbdafe5951b10ff7f0f6ae +size 33250304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..794087bbc99e0d7a6843622ecf50765993469055 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bf9bedb5587b4938397ba2e4797e65c68a5365329c8b664fa8cf4a9defaa8c +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b76f99b542bbbd3d1c38cd260af7c98024e4af5c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_34119e2d07794a4f0701+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6def46dee245c1b45e1c20aeebc7bac40046a0ec92f560c19c9bf3cd6a2f9a2b +size 19170304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bfb6bc84c7352fc8b59f6e1b09e546bca5ce8640 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480b58bba49beb061729eb56262e0e2d59f10a3acf7d61e00bfbcb170e47fea8 +size 789219 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..490be3bdb2775fd8512e27da4e2ffd9f84c2ac4e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_348fad79553e7f761d69+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_348fad79553e7f761d69+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_348fad79553e7f761d69+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:02:35Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1150879436636160 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-10 00:02:35.534493: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 45157604514 bytes (42 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ff9352fe22ac8d0efc3e3003feaa8c0470e4038 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a34c13a6fd956ff9b438786d7ec84b7076bb781517bba6af8d7958ed908951e +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f62956894f6073c224e5c4ae977f5f4b721fc5db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a3d29278a0f2a177084+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aff3e1b0b7657bfec4a74de0249462fb8ba8bae86da2f4e3935d172b367cf1c +size 38677504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d407dfb018a2f1848dde0e5829ab45d88ee3dd4d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12e90d473fcfeaeaa4c34d7198ae43bf722312e4bb3dde02e0aa879c89afcc4 +size 857905 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..f6a11fa95ec02c6ff62018fd7ce398e5bfd53af5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b599f77a7291a243623+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3b599f77a7291a243623+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3b599f77a7291a243623+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:46Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2467716409589760 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 11:24:46.599385: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ebe3b4fd99f6ed82544ca2d1a7cf46fa791b9ea6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4362717b57f204f1d833d9665f1eb41bccc86063418c66c629eb4dee2508be27 +size 849403 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..34fa8c72d4ae1aeb30b15e849f40bbe63c260467 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3d00b1f2bfd7bc895d7e+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3d00b1f2bfd7bc895d7e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3d00b1f2bfd7bc895d7e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:03:59Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 615726511554560 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 15:03:59.568826: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8d04b8eb4857594475ff41b488393c625cd8517 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5818d6ba0cb53b8bd802313088a4359a070f296b315ce24af0312fd0e99331 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..f1d22053612e9061116303b8a7a4b0c4eeae6144 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3f0616426d0d7a52e6a1+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3f0616426d0d7a52e6a1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3f0616426d0d7a52e6a1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966108). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T10:24:52Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966108). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e461348ebe1f9cad4593c9053362dfb643424b4f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725494d25e681e5537c5c33b6713f4e396837197d012d351d6076ded228d5d14 +size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..023312b8684f899064607d2fdb3bb17e1352444e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3fcf78f378a4928b9273+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3fcf78f378a4928b9273+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_3fcf78f378a4928b9273+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T17:47:52Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e1bca3002eedb0745604b5f524b37573b864e449 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fac2cf202675acb45af9e09523b8b7cf3387e8bc7ff29956e3645be5133d83e +size 775286 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..237b93798a39a5a098bf263ac861903680c761e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_405bc7c8c866d7839f5d+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_405bc7c8c866d7839f5d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_405bc7c8c866d7839f5d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-10T00:46:13Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..35777c89e844aa81fcfa79dde81e5d5ad37deb31 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3c20a68200e43220c923a7f184018fac0c4fcf48d61525a6fe5901080aa067 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8576820b1d85bacbb4c342a7532539824e8bb7ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4310522466763f19e4a9+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1619ebbec108ad3879e793f3fda1c4f05340d654e5e595dbb5764f6990d1bc +size 46398464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..02555c82e64ba7a1b39b4f806a723567b02f144f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e718cd5a281d4cc41802edaca89d9a2b6bca029cb6b84c80247a55176880c78a +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..55deeaf21beac7564edc6d51f717d21cc369d932 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44a91259c06aa0b69cdb+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a63ece8bcdb93abed21e1ca4f4273e529e175c53f8d9a3d6dc03fda6a00064 +size 67359744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bc8070359bf37e4ea9bf2c0dd56d6fc88606d2ab --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9142edbcc3c0374ee6510efea946285c02c08252438d7b24101d740eaedb46 +size 851650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..faada8f3cd351c38f28a4cef7f3fe742e1ff3f9f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_44d750cedc8d7c07669c+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_44d750cedc8d7c07669c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_44d750cedc8d7c07669c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5864732). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T09:18:25Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5864732). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a5d02ac67033b4c7f047dd84379a39dd1a64733 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d942d7a698ac8387a9bb4af5a695cd651da64c9625f7b6993c4617bb4d837529 +size 866231 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..2eeb094886b89935ecc35209fa0e51732d7adade --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_48649c9138561c013aec+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_48649c9138561c013aec+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_48649c9138561c013aec+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:01Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 17259583777013760 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 11:24:01.912134: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 8900803 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..00a72e8537433d0e6819198b6353f405cb948d37 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5435f6794cadc2a844b609ec2abb51e4d8c50a489f37b988ce9fbd3e02eb7e7f +size 857905 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..25e1b26b0c7c20a93d055a51c137d429f4ea3428 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c1a88b20d6796854ba2+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4c1a88b20d6796854ba2+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4c1a88b20d6796854ba2+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:10:37Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1924145348608000 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 10:10:37.591747: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9436a8caf72c15e28ecdc5f9bd5d6750e8fcda18 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541655e9e94abd28a90a4e6674d3fc5aa1a567dd4a8094053bf68c6be60f8d10 +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..3705fdd849e93d049daf3136122f33c110ff9261 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4db147dd7715ebab63e2+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4db147dd7715ebab63e2+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4db147dd7715ebab63e2+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T18:00:02Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1078723986063360 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 18:00:02.112089: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..89fe483c01209c697e885ae4fc181f401f1e424a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dce75c6fc6d9eb11f8096ee9b76fb86c439125090f203afa28513556b11d4fc +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..66ff4745a33ef955d0af967ed1a73c3593d67ca2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e90b6140446b35617df+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105ac04fa768fb096110d3909dac92fa9caf3b667e45863f7298dde421f70554 +size 9780224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..507db62182b74d773308723a4c6047144033da10 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6d849b415baaa0f5574f817f0d841af49896eab421dc5d4c0edbd75fd147ce +size 619281 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9bc2cb554af3542ae4b12531d283384f54814a6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e9f100f0dcd09bff402+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039609f18069dfda753a3fe8e34d57b032000eeec0f3c985f457c965dc4c0224 +size 14654464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..58cfcc9368c5fafef3bf512f19a460c47297eae9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5af3c2d175f36b77c6b6aa8453f3330f4380ae3ce24e8b9c811ffd8e7ef233 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..4a3f9e80082f78057f4aa281716b0e136dc255c2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec6dc4abe7660ed4aaa+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4ec6dc4abe7660ed4aaa+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_4ec6dc4abe7660ed4aaa+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T19:28:54Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6f4a5e767bf53ef9fc22e051961b3d274d302ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353eadc5031d146d2d51e3fd720e3e4d06373f495460e6608efc6e984c5beb83 +size 855517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..bf3f09e569e4fdf50590bf4e586767b59d33490c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_517d90e47cc2ae1f48fa+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_517d90e47cc2ae1f48fa+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_517d90e47cc2ae1f48fa+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:28:15Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 3078632557772800 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 14:28:15.296732: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 70984467458 bytes (66 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1c4dc67e9147305696c6229b65dcaa28d7bdba5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5bd0e482372e7ca89bcc7081783feebec4ee615497e63f51736751c35b54382 +size 625637 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f449cee11b38d8092b9b1d136e5f33171c002f9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_52d550ce85cfe5f7cde5+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6110426a4d5ee7fe59d6e9e4ab8ecaab2b60b6f80f64c49a207382792b88b22c +size 32431104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..79477c148a8f08608f3b76d341ef0a3470331668 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6da54952452a08f9d453a9e9885c2aada25223f34579275c397fbf6039a7630 +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..14bc214cbe1e8da4577fada6bd1abc1e29049942 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_532d27607bd8cdc47997+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf016863d0a103e2b31092ade03cd5b1c7659b4ed8f4291e5aff65bbf01d4c6 +size 4783104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f94c83c93431eaffeab0760cfc9638fefe39ba4f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9156b961eb2357c5b41ca18949c73bac1ea3102926640f9bbbda5686cb04b4e +size 775142 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b1be105d81d9187a4d168fdee8cf51c73f07e766 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_535b06a6f27a7161adcf+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f231ea1c66f2f7906a7f63db40582ccee1600652b3ddf5ec526382e2aa86d910 +size 21105664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..09b9a7637acd6742a33347896941232090cbfcf4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f68bb11c0776d0b2f7cc25edb0df176b18bbe9871ed3bf0bcbe755cef147a87 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..3ce65ffa5f29b9a99ebd991066b6f0d425b01faf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5448d3a55a9a90c15ed3+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5448d3a55a9a90c15ed3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5448d3a55a9a90c15ed3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5865500). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T12:01:22Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5865500). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57acaadb781deaf6b04bac7d3f18657f4f6ba8a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab1648fdab97bf68365e1aa5973e5b6031c2705415d4fd1b323d85ddf5087ee +size 859976 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..8e5fd2580dea2eebe0203750cf9c4bc30c26db2e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54baea067a19d8c74866+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54baea067a19d8c74866+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:15:35Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 12006666975313920 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:15:35.183130: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6514787 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dbee33b4dc9b2955634e49ccdf3378526e84f2e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45db097b0bad3610b615551bfc652b3e6e13944b68b642c64e9d2945f3020a70 +size 849403 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..2ad0e680d2df450316658befa8747c5ad21c4b98 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54c11cff81559082406a+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54c11cff81559082406a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_54c11cff81559082406a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T21:27:06Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 397456273571840 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 21:27:06.056648: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe33bfbd7eacf1062cedcd0611821c7d9e0832a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb04bbd02289dcb571e421b08bf8284ad7eec46ef6aeb00f5cdf020fb291bd2a +size 556222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9743f3a7d93468a4827659bfcccdf368c64bc024 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5673c277e4f5fe954d60+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da42505cf81719e1c931bce3f1de75749aa5069b2c932df7ab07b0ff58b5d8bb +size 19006464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1cf95613beb7a6a9257f56e136ea30c8caf4fecc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29038bb3e002fb2bbb8fcbcc6fa161385196f79f7aa57618d98e11309bc5112 +size 619065 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1736483aa52ca99d68ff2a7b5b3237fcd45b8990 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_576ba585336d7db50d6d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cc18a0a66664c97dab5ca3b1900f67ee0891b247f02afe16ed456d569eba06 +size 4148224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf4cc07e93ec71909fbcd7b214e7b8bfac67bfcb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7568e1d92b4c6c400778ea305b47da222ba2c5ce407bc6099eab3b8e1c7218d +size 619281 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bee4d0b7a2b4f666c38e0f241fa32aae22a0668a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57d25abe3c5f66cf1156+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c24d69006a85c55d02fb750be831a0fd51fb52e3c45cac56571d18a85fbea20 +size 30792704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad1a85dedd2f245a3c4bfc76831f2f19323b0a56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e302451c798a189295934c3769d0d17d4f1faa17589ae2697d667d59c8696c98 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..184fa3a8621bb3672ba94bf3414037713fd2eabe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57f9d95694e9df2ed2b0+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_57f9d95694e9df2ed2b0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_57f9d95694e9df2ed2b0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-05T19:04:25Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..75436785803ef6652e209ab17e2b2db4e5b63b75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbaeaa0a445532ae79c0ee453aa16b6a8aa7d94acfe2921988957a56298a4c4 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..64b6957adab48461a05c884aed995e7593037715 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_593ee0ced8d35810d75f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e10fb635149257c9b4be441df10e0d1ec1882e75fb725974d155ec4fb69777 +size 65281024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9382e5f9ef4eac58b5082a9e63144451d90dcbf2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9afd9b922494f37910fd123b49a349abe77e409cd972f1235a17fd3f9528a78 +size 857165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..c609c8be6e627e4f583fe3572f1921ec17f99a7c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.log @@ -0,0 +1,4 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5c4a6aceb0aa544053e8+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_5c4a6aceb0aa544053e8+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg00: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg01: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T11:31:50Z [Errno 2] No such file or directory: '/tmp/nxd_model/encoding/_tp0_bk0/neuronxcc-cpisyt01' diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc88224dacf536764c49478aa04aa0111c56eaf2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d87768a7dae7c4eb4a2cc7d009ba26d2f234060dbbcf093f60036d2d6468905 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..742346ea342166f16e48edd2c00b49fcffa1976f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5d24f683ef1fde5a5652+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8aae3cfdc0d72affb6e2dabbe28ce1e6b11e55b8bdb0b8c00f9202994daa01b +size 156437504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..62cb715860ab6518a8e576227a79d7d330783946 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6ea37b5482e7729e90f838316517feb2d8aa8bf442cc181bff1b91390f7899 +size 859829 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..32c1fe24d1f58b8ed71e4b3994550182d145dab2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6091f5c081b62d1bfd9f+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6091f5c081b62d1bfd9f+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6091f5c081b62d1bfd9f+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:16:03Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 4771880464547840 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:16:02.994627: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8095c777688ffa9550a5ee224949b912426e306c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f475f3ab2db55cd56a5e08cb304ec1066cfd61338a8d8849e46967447d76a5ef +size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..4eef072f85c929efd2cf79054960b6ca178e12b0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61e5214b0dfedd5841f6+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_61e5214b0dfedd5841f6+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_61e5214b0dfedd5841f6+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T20:56:15Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 320490459627520 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 20:56:15.623840: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b19c143b5895f3737196992fe6f8316a705e2f80 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b4f2d1b4dd63ec116456bcd5fabd1a789ae137e43116972174a6fa8548c2e4 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..efd43803d5aa956efe152626bad1f2c7deac60b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_621041d0302133e5da54+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_621041d0302133e5da54+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_621041d0302133e5da54+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:56:57Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 384829069721600 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 16:56:57.365010: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..97801b2644dcbbcd604cc93393d04d68e4f3b12b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455c96b407108dd5ed08b5cb8cf14d0837c9bbcc9460a0e90ab3ef9498c957f2 +size 625637 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4384586ff66e3ed5e22544d4791d7474a62d8e86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_62a5b0034aeb1bd8056d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363463bce98500e44b0987b9a4565b92fd1ad59c6499c707cc89483a6e2e11e0 +size 71896064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb67db5b3314f4eeaa919eac004969a9f2e1b10e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a83949de28fde124a6106707f8c43c8434bb6c9497076254c57018052a7a49 +size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..14c27b8697cc0869590f43f207db57c206122551 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_63b228a1aedd2e24d261+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_63b228a1aedd2e24d261+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.482GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-05T20:24:24Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.482GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7d7304fc7ece3982185cb2f5b9ebe7edc5e39bf3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:402206b7bbe5953ab47efcc4da5d29c60dace2ad95797db062b2260f54211db7 +size 838840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..507c8803dbdd5551cdc109bf271c7a4fe5750521 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b0c122726250bd8b02+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156f94119153b501d98dfc07fb221b3eef7e5d19def64a06b77b99a2fdd8f1b5 +size 32021504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea4c584650546f5b8e61408fc4b65866b1d69ef7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e8bf77004216ebf1e6491d78b157bd39a6fbd792b3f4034b398eef471187cd +size 782117 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..4011e1674bb8dfca3a5198c047e2d3489c534a9b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_65b8f98e46ae7a9ef982+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_65b8f98e46ae7a9ef982+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_65b8f98e46ae7a9ef982+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:03:13Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 172271138242560 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-10 00:03:13.732131: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..60cc7de8b34165a659b841089b1be1b59c62e1f3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e69e62f72d5afd9d36424565053111a2fbe24677c272ec7d9e75a8b913e23e +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd1840e2a73da7e299b4c359840479d2f9e5b92d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_66af75d88691306d5c36+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744ab895bf11348a08e9d410d60645d20e617c71b18f0332ef9c764dff4c728f +size 22365184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82b6f7ed308e2a0e4ab43cd934e6fde341a3885d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891f2e93aba0e86510e2c390a73ee3e0b76a89330e06259a8f0d581bfdc55c0f +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0a23cd04e4180bd0a1397c1ae84be192a5ae58ef --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f40d8d6238f18e019+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2102ba0c5abd896ea993a7f4842986412d05830faf319e8192b456d2ad24cf3f +size 124089344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14c0bbf05c6792c30447c12dcf8457729430679c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e94729df7b3fe6f3eb3e5d3cc0de0e463abee799fe527e4b2541dba45ac577 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..0a363429b89f7eff7d0d45b36c300712da4c550f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6ad2615b3c956cc7d737+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6ad2615b3c956cc7d737+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6ad2615b3c956cc7d737+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T22:21:33Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..324a4cce76418529ebb16435c22b675ab33f0d05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46d08956cdfae8727b5a4e8c96c76beb5087a56b570e3b7f4ff74e25552d64c +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..fdcf720b302fdea6741cc44de95a810af11c2715 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b760a3a59eae94e3abd+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6b760a3a59eae94e3abd+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_6b760a3a59eae94e3abd+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T10:38:46Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 4314895944253440 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 10:38:46.353799: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..338b05898b1c64778104bf1c477f47d5946bb5be --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae6eb76e276f963ca3e38872ade2738550a3db59ccda86873333ad34e9828af +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19ffe74a48d05bd49cc04f530c976bfc6509fb35 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f655c6dbddaa46f86cb+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8becfd4f77abceb1cf201019fe0067461a001f83ab161fc060294b1209d0467d +size 247000064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..806bad5f4415ecdb9e5c6f18fc8bac2c7a6d8cb3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86138b4b2d7f4e55d2025a4ffd92d86013f680be49ba958ef9b4c7df7ea9d4a +size 782241 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..2164a529709974b34cfc97d62d144cd3419c2cf8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_707302ec7fac3bfcfcf5+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_707302ec7fac3bfcfcf5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_707302ec7fac3bfcfcf5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:58:59Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 287719859159040 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-10 00:58:59.107354: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..af67a9657d78343687559592aaf3f73d5ce41e7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c3310923e8955055bdc926d0f7a97ae08cb9109d243b06a3c5390962b87f14 +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d6b477ca2ecfc28bfd7ef73ada040eae55dd948 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_72772fa709f43f40424c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669cef4f247802cb9aaa1c10e203e4ca79403bf47d4d0c0ba922413b9ba78359 +size 5039104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..550eec71897451df89c7f12013e50dfc76f163e7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8636bfdd8f45ad19de47a746dceced3e2a14101e2253a3b4b9db7d62edc180 +size 774785 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ee182c404cabe9b55eb6918c4c6b54e4b522af35 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_745d089e75e795ad6a8b+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70858608aaec3fae6ef35cf47b9ba0393e30ee52456bafe6b032cefd7d394291 +size 37725184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..713a9aa1b62b4676ca8e5dca6c39b894a9f49f1b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc4d2b2ad915e51ac5b7a2e50496585019ee06395d518adc4eb909c014f9d94 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..231dd8d3ecc65e5bfa7c5c97da442e2455895fbe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_74c7a8ec0cb73b46b0d5+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_74c7a8ec0cb73b46b0d5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_74c7a8ec0cb73b46b0d5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.443GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-05T18:03:34Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.443GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6b09f940f7c8f305995a07a17af1a757e7e1821 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bbf4db8bea8bad83769e617efec30d70ca53fe58123c313d563561b3fa1032 +size 550227 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74dd1d8d5ccac7ce47144863e142f2a07a2b2904 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75b1bf9bd90504d384d1+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36ebfe03e25c557478ff71757909e7d4f35c68526af628fc3245b72fec9de2f +size 6278144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea79d6adf14d3bf7dd44b346bdfa7111aa7c5f73 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1adeb107c7b1d52a0e80c21c775ec266dae29c11ae7a844923b275ba9ee20a68 +size 774422 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f9aab0159f8807371679bd939bfebefacd96a691 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76054337ad9a319bf520+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd0bf3a62c895f30bbbd382a11ca9f85806e269de3d8a4a5bfa190ddaf1dcf5 +size 63611904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..06fb6b2051e37c0de83d670726219fd6c2af4519 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89691a25cadf456648cf7022db1fd48f3603acd67d62a7fda0002b609613c057 +size 619062 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..58573b5e88f2ccd7943e2ebff3e455ea1dc4e611 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78f3eeb078cd51748fc6+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac372f8641c416a8195b7d449f6c22e5375dacf2ec8b1aae1cb94794054e55b6 +size 45374464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c222ed3b1e52f3649b85dc534f1e5a996c72a39c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b21e615d09cafd59a930c643e6abb941363dde476df24ddecc2d12b67d0d408 +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7fc1a0ef0f3d9d278b835a4ec888c6b1e4cf35f5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_790812dbd057158980ea+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb18cd4c3b3c36914e28de792426aa9e0a5b8e0a27de20d53eb7b17ce749c6d +size 18064384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0e5ddfbd4a9ba2a72526d2cac95b82f74c6b09a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc10a2c3da10f255700159a331969f20665b37395197756053b88fed2085c48c +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..6a7ed5f94c5f8048102039558ac1a415340ae872 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_798b5985b605bd7d793e+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_798b5985b605bd7d793e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_798b5985b605bd7d793e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5864988). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T10:41:46Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7873048). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5864988). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a05eb723ebdda4fd118c29007074eb3a1bf98f63 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c140ea9da4f082f3adb76fb4afce224161acbeab7486844ce9d8c483a379f57 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..44853e71b394e286d9822b4b0b164de928b05432 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7b11eb682e93341f0887+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5378d08113d4d925b0a142822913b1ca312110d9fdf8aa93446d3a1fd71766d9 +size 36178944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d720f6dae8800a073caf9fe02b2d57624330d5a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373f0d1f96164682ea2279c8ee2d6f582d9dce0a79dfea17bcfa8ca48d857038 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e0b4890b93fb5b1d911f6f2c351918272708f279 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eacf49e1370076322c3+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_7eacf49e1370076322c3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_7eacf49e1370076322c3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966876). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T10:35:18Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5966876). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb068f0ffdf608210208ee83970889db5f142a75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155c6e081c7eb1c61ece71fa77412b3525882a84d8a82f0347908930458bf305 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..a64e5290c9e0f9c81d0a8adc321322a4c186c7c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81503bf195cd5d2e22b3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81503bf195cd5d2e22b3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T16:34:29Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 769658139443200 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 16:34:29.667895: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17809081346 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a3bb1628c4fc8e3964633add655e0f1bcfac77a1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027ee1036810d7b2f04c2b4b972174d500ecc1ce1ce172819d5cf62ca0cd8d46 +size 774785 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d7cc448e59fcf0fd2ef75d71f5450337cb42fe4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_815bc4fb35f01af9f1ae+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2ba7073228626762f8e7f97b1deabdee1b34778fc2b58390163bf587d712c0 +size 17183744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61b0189e1f86f52cf30512808521981d014f6c86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260dacf0e013367c4a6fa52f5804c41b3cdac5082269571202c1cbf503280f61 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..9ddf6f93227d64e2a37fed267802b3c6e484998b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8175499718da7b5dbf78+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8175499718da7b5dbf78+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8175499718da7b5dbf78+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:59:23Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 750416685957120 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 12:59:23.391612: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c420d4511c2631712f714541707abccd78a10393 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebded06edfe08b5576eee1338c8a522f0b58b37913aa1e3fc36963ad88891e65 +size 782241 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..b6d50597ef2e8dc2c89a2f095f6b94807e1caf98 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d78a45011a5ddd4264+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81d78a45011a5ddd4264+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_81d78a45011a5ddd4264+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-10T01:50:32Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d7b18e9929b91487b917b95f3adc6624d76fa8b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dedbdd882e559cf411b70576464063a641f3b0b262b1ef773f800e71a71363a7 +size 781521 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6fb4bf3830b1fa393b977a704d35a2c3f61cb487 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81f3ec270dd5aa6f295b+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac58b230d95404f150e7ffc8992f25b39df52c8fc136d19cd872e6b6917eae49 +size 29901824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bab374b79cfd49161e3853db902cab017bef0f0a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d101273f956ab4157415a6723274f5750c9cc411bbd7fa0b9aa8643e4d978fca +size 857905 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..0f8f76861e2912d470fe539d98c6a60ba2a1e353 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_834268a784010ae47258+fb4cc044/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_834268a784010ae47258+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_834268a784010ae47258+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0dd27fae856c8e8aa75e07754fde2b02fc2a08c8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0944eaa2aebb4cc1a1e1fe99226aa36df9b4bc72bb43c1112995b5ed337729c4 +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06c5ecff3426ee27ee33a7ea557c6f3e572b4455 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86b0f00722f218bdd733+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5351b5d8f7ad3451fe85647b5debbdc4086c73bf2c7203b124f96197d72ed21a +size 9268224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8ed66b3541df5c426f78ab3877b1fe8bf3d69fe2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6bfb66b8001bc3bc3e50b6b298e91380d9d5a81a693c2536a90ce09a364994 +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ece52967d6389d8eb6e17b0b72c18953170714ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_86be7015514811130001+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906ccc485187a84476b8b7d9ff353fa27689c9f5db88efb966af56403ed05f78 +size 46377984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82c2a61b1b5e5caaabca5f77cc78cbef3da4d8b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e45c384167132d812fe4c7c2ee3cae54466a96795a354027643ec4f3d9650f8 +size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..77ab4dbf5bda50ee1012ca11ed509cb792281b81 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_87ac40e3234811a067b9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_87ac40e3234811a067b9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T12:23:06Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e7c04b0654c790681cf77abb8728ee004b3e3ccd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2801c76354772f7b850f7fe0ba970683b2ccc6e138ccefbca9689674d6a505b +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7892eca96983b66dd79d624741e63198df3a276f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_882cde96bdd79751820a+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa49e00dcf857c33405eb55bd926e393b11f4428d8580432213c66415a2067a6 +size 35666944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9bad00eb2c6908fedad395cc9d40fa3a3466b43a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0111c51b3983101ed318f88e489be8c968dafd5d246ca313605397ba363ecd6f +size 775286 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8445dfc255f59b5a5bebc231005097f8b5922944 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_889e53ae9bd551bec0ad+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f82dc164ba206a694c8580aaf2a716b28a3e6ec764e24cadafb9df7df37b08e +size 45937664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d3a63bdfa7b4b29ed4cf063b2e73b350ab0ab80 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4978f842ca956598128454baeb9e9201a0c6406312261a042ef367d228ffd6 +size 841224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e33208028a33036146746284c5d8a9a88add5a2b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ac6eb993143ee441f61+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71b6bf8ccf928c1fe739c2e8fb95b83f75f2fad9f7fbc98ec83b52fb814f269 +size 57385984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..99bb690add4bd341cbad62899cd523be5c2fdfea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40700a68987dc3b2676779f190a5b41c8318e733bb9a2d9b48b798ecc68b735f +size 841224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..375f1f29c27441eb31d39f1d361fea393cd0b81a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b45b48562f62a010f77+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8b45b48562f62a010f77+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8b45b48562f62a010f77+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T21:43:47Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1b4ff82e6045c2a7036640cf8e75a3b5ca57c21 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a326ecb8f0094afb779864216c4e04d23c873cda9ca95f17427f47767bd42d8a +size 838840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..725055d5337d4419e636a3dec8bc3609c4c2a554 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b9955761e61e2616e95+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb179225a764454cd65b5cf5bc49a43a8b83441dc6dd5433811dc2af489452cd +size 54866944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0c779920f69b38031e6a42125332787ede9382d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf172816c92a0b2bf781d1385078247fcfb5011c30a8b1bf3028261c5d1739e8 +size 839280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..46f521af4ed54a72492ec156c16edd150cd113ce --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c2c4d30165b7df0ff60+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca314ce2c63f0d11cfc1440335e62331a26ef9e57a09d84ca0f8c22dcfec6f55 +size 14736384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..95eb96496c81c57baf9b71e70bfe5001ba349e44 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58f33803cc8da67492d0caacd58ab3867d6a509df21377c66e7d38589199523 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ed8bbc3920ae59526a67ca0ea6916f0bb58c1450 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ccf7d7e0da627625b86+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8ccf7d7e0da627625b86+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8ccf7d7e0da627625b86+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (18.734GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T16:45:52Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (18.734GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..52330a0442e132bb0ac847101d36f9af526856a8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193b34e4fbfb73b0e96c027774b7518fc7978c624a2c8d92e76a7b6b110bec34 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..89c2e3a97d33df2d0b30b53503c75fc080ab3420 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8d8d38359e9e9f052a77+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8d8d38359e9e9f052a77+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:22:20Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2385940232273920 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 10:22:20.765302: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1d9edb5e1cf2bbbdcfdc8d092e3d62611b42886f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357a1cde5273dfb5488f1d3ca1a423238a582e7b466abd04ffc56f64971b8575 +size 838840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12466c95f51d7c1823cec80658712a654597ebe6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8dd69135fcd689bad9f9+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb235029c106f08e0e5127c3df479c5256ed050bfdb3f1b7d61986272d5871d +size 27689984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..556363d99666586a4066764be08fbd3afca04404 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d88260f5bf6955ae3dddb08b19e8228d17672b9d527257136a8d37883bb5e3 +size 781888 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..295a5c9458fa88062101ddc62407bc19c85f3a84 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_906394eb7f18a4a73151+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aef5d7c5a48493b2ebb8448239b22e782b8054012f04edd18f18e3e2e264243 +size 264766464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5a09b4f851d44e4000cfe47b20781c0c740057d5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6535825e54efcceb9281f6b25d0cde19b16ba7a47b5ada7ce2c5c577bbd764d +size 556222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0ee7d5f233204470122644eb025a8e3fa208e7bf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90b55091a26739499584+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d41e80269c45f29160b6ccdda317a4207fdb0b73d30cdad804317c6be5d7cbc +size 36680704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..39db8a319cf188dd14a13d412e2ed492abb990a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547f189b3f1fccbbde96bd0f508680b25ab65a63cdb864a1679897737dea3891 +size 782244 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..003ac594240df7c936e42c756017b3f5ebb04792 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_917f77d4c0e9130220f1+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_917f77d4c0e9130220f1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_917f77d4c0e9130220f1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:29:22Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 575439718318080 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-10 00:29:22.587343: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22592248994 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6c15ea5ef4779eab2b94774ecb68b76cc13c4a75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db84619e6d90debc6a0fae8c22d8d22211c1e2ef88e25bc05da015728baabbf1 +size 849406 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..3c58df8d55dc03005658fbaf4f24806ab196407d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93763701819fe4666319+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93763701819fe4666319+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93763701819fe4666319+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T21:26:47Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1102775802920960 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 21:26:47.480495: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22617826114 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fa6a966f699ccb6b51e113b5d581ee77974f420 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad493bc86dc595471874d38e94a355311ddae6339ff7c07e5b42ae5d0b80a8aa +size 851650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..dc0583e25f98d521ab6768aa007ad128a0c26bd4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b8b0c7e8019251e6b6+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93b8b0c7e8019251e6b6+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_93b8b0c7e8019251e6b6+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5965976). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T10:19:17Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (7459608). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5965976). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..01c6f9098575e4dfa5a65fa6a58790408149a5db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446dbce3ac79d663547505542c3aecd07cb61a1d9089ad16f9c5964aff3e5194 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ae66e07370cbb62f53b859a165e96b03d916a4be --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_94cb050e98e4f6114fd5+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_94cb050e98e4f6114fd5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_94cb050e98e4f6114fd5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T23:03:19Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea8aea78abbaed9ba2a89cd64b9bbff87573151d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109b9d025dd23883b21235785d35b9f954caa4b20cde9ec870bee811a4611765 +size 857165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6fa80bf310549199f070881c23f0c0a6908dcc17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9590cb0e237c0f2b7303+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c1a7470b98b717e957dfc1990f816b2a5bef0c371d5a1d6a017e5e13826e7a +size 132373504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9fbc36d42abbe4a4fba5bb6b829df7cccfbc9fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c40b9a423327569ea3f731d7f94366e8ac985c8bc6472096b0ab785eb8a082 +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..80a410a3ac7fef3bccfe18b1067c6ef7ad121dc3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_977bee6f36b120b2f4b3+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_977bee6f36b120b2f4b3+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_977bee6f36b120b2f4b3+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.581GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-05T19:56:03Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.581GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..943526674782a44c220ab1aa0274b4403676cb30 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d631ee9fd37e14151f29830356b7a11682916fc8746c3f6274333c81538e1e3c +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..69b1f8b21f1b6fbef1242c6f0b0fe2754d9abead --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_99e9377627d0673637ab+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25faff5964484e532142db512b5342b1b5b1514ab7f67a2f1d71e31891a991bd +size 28417024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8180b85f333a4bea0853ca5a1715f1c6f7279228 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07f46d157553a8a739c6e9f5624d2082b1a1a0d09b34b31b5e1f658670c74f6 +size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..94fa9713c6a2f91471d626fa806985a64785d016 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9a666844f4acef43578b+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9a666844f4acef43578b+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T12:52:22Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..96b71721a78709b533b7fa24dccf5e6abff9fc4f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a832b3ba0b12bac416866610d955045724261ee33d6e31c56ee4810be9a529b +size 781521 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b04a6146884f3941c2851822ed3bccc3ce85ba0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b3270c73bfb15154d87+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e18d0a88fcc2b254f0b37412fcd524591cb9b90b81efcaf97d3d1a44dd39a2 +size 41595904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8da02cc3c7ba3a93a31e0aa90ef3162a8aa93402 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d07d6c314d404dddf82480bdee8525232620f2259d260d3a36df9f565479c4 +size 859829 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..dbfd270dea265a632414362fcc894235e9fbb0c7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c85c604f278ac1fbc3a+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9c85c604f278ac1fbc3a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9c85c604f278ac1fbc3a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T09:15:25Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 3083442921144320 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 09:15:25.136381: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 7360512 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d3a7bb2d7eaa4d399fe2191aba411e1a8c1c39b1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4747986d49d64e7f2993f0384756b5e1c2de90967a683005baa6ddef238fb9f6 +size 846751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..13f055373a3b47cd4083618da3055861bc124b38 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9de3d9acdfe88b9b1868+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9de3d9acdfe88b9b1868+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_9de3d9acdfe88b9b1868+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:26Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1039038488248320 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 11:45:26.097106: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e0d7e5c0a8e53423535a1cabe05e71ddb9d1c14 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0c35681fefd173b040da6b1e5ef998a51eb234a6c5aca4735edf48854e1c52 +size 775142 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0efdd68e073d694aea2624aed3a6889f1bea9014 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a024648ac027e9e2c579+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382defbec55abfdedfbec0e9f3dc3cae25c504a48d2f07ee0361bcfb1a446906 +size 10691584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..406b5a5129369b33de963397410b0ca13bb612ad --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4338629fb3796c22a3fba7d5dc20957a4b434788bf4454331135ce47d7fa6baf +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..593ca9a9639a023918ecfd596acc64236a3ff2b0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a052a43168c011c69691+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a052a43168c011c69691+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a052a43168c011c69691+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (20.996GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T11:26:37Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (20.996GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cf413880e2ccb1896acbc655c7f28eb7e68cd9e9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173814f91ca84d26c6831960840e82f42ccce0affb4cd9472e29446a8a80aa13 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..10f10087026df16a972193fabd8eeaf662c0d758 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a18da554cd7fccc0540b+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a18da554cd7fccc0540b+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a18da554cd7fccc0540b+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (20.034GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T11:12:46Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (20.034GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..24c5e49e88686b3e72379106cff16b1e1f8bcc19 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227fc2170727f24ca0fe429fc6487925326bbb0438d5089a3113a11e61b394f7 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4de6faafee46f9b6074c292649651dd3b672df4c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a402787b8478f1fc5d77+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6731c8f569d6f3b824fdfe7d4471310f266ea02b3972a51e9dd2fd68b2eca3f4 +size 129014784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd915a4a576c3629f01f02ae88f2770b17b91790 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d1e1b857c913ac50b7a96cbe4e7d9742412e47ce55ae083d53c525a3ebfd52 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b3651d06597ba14aa4b77924d3698f4647f2709a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a45b47bc75d67a3051f8+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ec1d3ab3aeb21f8d60152ca8cd3f18787f055f4ec622c162c6708f139660f1 +size 85689344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0b754c997f61e5f0615ec4ad2a354add32a5879 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70aff37b915d8d5c648866f14bf740cc985c93a43441c58ca6cba257778e4f7 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be2661a9fa56967bc304fde0ed27dca1b324d063 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a4c75e33226b582ed13b+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3effcb86799058bbf2cf83f08943889f4bd1a9d48842345a14536b2b85b6ee2e +size 87870464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c10fd3c08db40ddb0bec9e5292f676274d1519e9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aceda0cac58a1b6c59aa6387641e60dd0a22a7971fb0df6a0bb5633e82adf64 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6639d8424f69cb656a766c50b5f85dd384ce6c41 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7f908df5e06d25c8068+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f731be4f5b5f1225232fc08d4c3909c8d4ee6bdf26ca814a55a3417b36c597f5 +size 168776704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e91edc67c93d64519a7b9f2ff2c7f90f3d66c520 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15100ce39f26f16fcaddd136cd0bb892f850cf5d6d9d512795170a49b80bb21e +size 851650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..48c0a72bcb64ebd2b6ec8700ea5f86fe456149fb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a920d77278c50f1829c7+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a920d77278c50f1829c7+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_a920d77278c50f1829c7+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (20.627GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T10:03:06Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (20.627GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f0bf956db3d06f6fb01679d340e04571a1bb5f2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd95c6c3b85b32eee96c58ed5c9afccb7c059a5b408a0d846dea2a75fc9d39d +size 619281 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92443cb68803c4d5d754bbb1d4409fb7aecc33e6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_abf57419dc4fa1e355bf+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f82492624d3ed8f4b81d02e0254e685ffdb4d7210136f0f9e3e129091c7d30 +size 24597504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ba7f544ff0ce4685e6e1435b61b7175923c5c112 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e4a5ad1a29fc69d1d0b9865e90341f1949a2bd4f2a28d1c8a39c2ed41e0977 +size 626221 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1264f89e0987be4f4389011341baafdf459f7e2a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ada63286020c60a0372f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328de3e5d81b41675e00f3c6919cd9bc5a3e973419a81f807044414733b3eb91 +size 30147584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9312dd2382b7582a55214dbeb8c999c6ac258065 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b5c6d44187b2679bfa49ed9682578b9220f18b98a991180f7c19d1c0e50c33 +size 619065 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36186bee599b4ff9c72175b249f4dbf091fdfc6d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ae3a7155c9cf865b3bf3+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6da604473413c4f09500ba4accf0a72d68de685f01e59dc8ae2d89d457c2ab4 +size 15627264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..adaa5c55013fc5f02cb6f9c7d35be9e47dbd2bed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43100720449b64bb211897dcc9ad91049212ce6c47f679f810861158c4a3f1d +size 774422 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f052e909df7d5caa8ea26048c7b90f74cef4dfb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afebefb7fd2d147fcab6+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104d6574e0f283c497f9bb09caf1ce58cb458bbe02531063d7840b40cae65307 +size 143565824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1e675bf33fa1b6d90582658b185145c090978c35 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194246e4f61b065a989c95b6557fb80f7bebc4cbae498a64c0b73dee6f4f9a49 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..8f480259af4d1fd838c050b8d9a9c7a1d7df6e2a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b2cae69a68a935040fee+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b2cae69a68a935040fee+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b2cae69a68a935040fee+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T22:01:03Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 551387901460480 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 22:01:03.639098: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22024989188 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..18acc4154cbf38c4aa1352a6433661639243b8ab --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a83329ed15d27b921b3e12f316c39cc253008d377ebee026ce086db24479ab +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6377ee1f16b77ccb821f37741836704c9041dd9f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3633511af08e66ed5e5+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1bc1f4c0d2ef4c0c95249ce8f3cc4b158ade64ca875dd4050a89e26e663e41 +size 53627904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5b014acf8624d092db074a5cf2b24281494d9be3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0707d711c256cd0873d82c58f85ea29c1e9038e74d0779e9037877c264bc537f +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02d7cc5f20e8fef9328936ec392c1930039450d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ccc1c1e6b02608fbdd+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a14b33e36767751a74d8b1a7441b6e95fb8b7c0aea6802e105d205d5ee4b8a13 +size 28939264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8164d5252e4fb7d84c18e5b58889f238269139ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa29c402a1f1fa7ae1feaecd65918b147e28f0165344ce53024828ec2a25719b +size 846739 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..41332b86d0ac41900d87baedab47d0c38f3a9499 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3fedb970e36dce47927+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a343364333cb4e15b372d2e573e86a6d38362409de7870fcaf2b53936627d255 +size 23563264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f12cbd0924dc35dd832f7158737ebf59b2ad3ba2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c885e2099288ea6db6ae55e6a254cf3721cf67171201bafda71ecd3d5c3d7f7 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..93a6a8c2650bdbfc10df3629f0bd24966c98ee3b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b66cb711b2665f2307f9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b66cb711b2665f2307f9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T13:16:37Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f03ff420a3042c8ab9f4ad3c2a32103867a66e97 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc258e590ae998ae64e89ab0e2830c96ca941ffaf8d619d07b796e7b80a7d5aa +size 866087 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..b8c8c07ce3c9efc3a22fa8d8371fa8a36682f67a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bdbb42ce2ef79ccbd78d+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bdbb42ce2ef79ccbd78d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bdbb42ce2ef79ccbd78d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:24:24Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 6166885842288640 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 11:24:24.204047: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22979837954 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e503d829b5d2aa8605a5c2b20dd1b2bf03a3bc92 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f97ff961fdd6692ffad8b49a4ab3ea173d45280f3f2aaa8433e37489c61295f +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e59d264d173f6f49a408ee9c30c561b12160600d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf24b4e19296bcdf44f4+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf24b4e19296bcdf44f4+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf24b4e19296bcdf44f4+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:21:54Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 6003333487656960 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:21:54.840063: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8708a21f2586ccde87e80b2168d4417624660a34 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f27889c431ff3f43f322697301754e25bd65666daa4e3c0fe93401d5d04e14 +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..5f524cc2de65462e3ef7abb07bd649db9204af7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bf6b149e8e5c589b0317+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf6b149e8e5c589b0317+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_bf6b149e8e5c589b0317+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:37:50Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1500833371914240 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 12:37:50.269674: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17890928130 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc3378be22d48882ddfa6ae4a41a0f1d48bdc359 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69239774522ac2018f8b08b1a63a67d96ad634f685e9ad21d9ffc88154a9bc0c +size 550151 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e95fb4fe08e108c302582e245d6aa85c674a42ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c1d12458349b6c6d46c0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0b04810240c8f922c56e2963a91dc64b3fd5468e1b3a550985fc7fd6070918 +size 4148224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1961dfa58740b350fb86bc863c16b237890945a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb42072fe8ab4954f3d1be7f178cd0ce00bdc43fcb0e6532fa0db450bbe1ae +size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..cc9f091788d129bd1209b7bc8c6c17e88f54c117 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c241fa327ced3c459be8+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c241fa327ced3c459be8+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c241fa327ced3c459be8+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T17:24:41Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 770860730286080 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-09 17:24:41.125977: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22036949508 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b76aee176485875a79189a7c03afb5c22c4f237d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97002bf1276688b86dbacec547a468b20a39619ad01acdec7afe227d0588d258 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2936d17b1908887f89585d43f13c36cb76e91d92 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3d7a31c5ebb0fbecd0a+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae1b5976f6f6a8be08768e85fb0f398b453be08fc257d13c8164f7af5229c12 +size 78746624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a946a12a2cbb48ed17cd934a1e0a844a96fd6c23 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414f56935a5f34465b170e5bb706e4b699508fba33b555fec5464b21cdd2f868 +size 789219 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..07f83f062d22fc1c0373e9ca89361e33a5b02bc5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49437c751cde6d77d42+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c49437c751cde6d77d42+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c49437c751cde6d77d42+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:02:53Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 421508090429440 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-10 00:02:53.813190: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22592248994 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d37ce3c238a9310643ebf0a80fb4512bcbcd66d4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f82df38e230d68646f4e3a483402f843bebd5ea220b4a44db806f3037cee79 +size 781521 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..10ac8a7a39b79c2b8cf4b368141803a651c4c15c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c72c64369218eb78825d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b56f524f1ffe52b3a27000929a908525604ad63a335ecfaf1df3fcaf5386330 +size 81316864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c3cba9180d9b617cb9b3581d7a707d008434fd78 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665829c36ab0dd72abeb33d0112b5a30edef86ce4f26d19dc44413c595dc8a07 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..f8471e4a15a02602ad8b96d1bece63e3ecd959c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c92d4f9d2dca0587d09e+fb4cc044/model.log @@ -0,0 +1,10 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c92d4f9d2dca0587d09e+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_c92d4f9d2dca0587d09e+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: Process Process-1:1: +Traceback (most recent call last): + File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "neuronxcc/driver/commands/CompileCommand.py", line 1328, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline.print_dots +BrokenPipeError: [Errno 32] Broken pipe +[NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-09T15:21:09Z [Errno 32] Broken pipe diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b1c1b01ba1100a21964ca6a0da5a598b5bf1ecc2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7bf96f1908aeea39937880d245321efa55e88f12453236533767fef4730577 +size 775286 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b32954a74dd257dd454f71ed673eecfbab7f8e7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb4521984ab2c0a81467+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c24274c84859318283c70099e65d1f5e255cd9169510089776865243e1f822e +size 40920064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d53269032545c7560dc0704f70caadcc7243bfa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b146e0ba6174d46f79fae4f6be805e7983173b240feb0d76d1b2c0b185ed99e +size 849406 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..55c051b1df6889e1c8c36bc600fa30fe09385ea3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cc2ddaca0c662ddbdd97+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_cc2ddaca0c662ddbdd97+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_cc2ddaca0c662ddbdd97+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:03:39Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1539316278886400 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 15:03:39.367814: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35534210050 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a618461497bbbd25c554319aa9daf6efd7cba5a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354eb59dbfa69b026dfb2b3398a9b2a549ae0069b84f3b1b4af1f9cfbc61daf4 +size 841224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..27331138f6131047d8af9e259090596856f60dda --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ccac30ec5602d9f1f532+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ccac30ec5602d9f1f532+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T15:04:19Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 269380348805120 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 15:04:19.605773: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a86cf25609f120a86e21c596c432ad330494c8c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:285b84e101180c9d1f04f1506ae2f5509f08786bc0c8f05ccc562883b4025b46 +size 846739 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..35e957ee942a223e34a4e4830a66b54edca32abc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccf17d106921a01f8900+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f4b4fce665c219271072e4b7732c424124c0faa8cd6d642c0e0d7d843b8f83 +size 69643264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57457760db29ff119309cb7cece252ec078cce5e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5fd467fd91f3b3dd73b6c8ed27b5f9ed16f2bbe3ae2f18fe7fc4cb4995f708 +size 851650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..bdf027faa7758834b9a72ebac549d8ddeb15512e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d3bce6a0381735dc8108+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d3bce6a0381735dc8108+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:16:28Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 2078076976496640 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 10:16:28.780083: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9cd23ef3519b16f687071fff05d6010279d9566e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d13a7140909b86c181885eeed7df5c2b2d4dcd10f9a7a4a987d97afec5a5778d +size 550151 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..77da8599835a8c657c3775f789c8beb7f2005bae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4a7c9ec6145376f3d20+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c663c972116a5988617dd8e9506304f016cb4976178c90d6fd3d798a54f1c44c +size 7803904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8f757f067d6ae625c67089b650fbd02399793250 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78250c9bb88f3552089e6300b6e05295f6a35aa8a0b99238dce3033e69b04606 +size 857905 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ffdf9e205a39e692f5fd7197257469e0593efdad --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d6835439f8b0403c5735+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d6835439f8b0403c5735+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d6835439f8b0403c5735+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:10:09Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 4156153952993280 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 10:10:09.541494: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 18388300290 bytes (17 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..692cb22597201e3da583cca101eab24bb8a296c4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf5b2ce1c42b9ffeeae9ec8bd9d00115b48a1d54983088830f9a1f7aa3a9f8d +size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..d92ad308a83e71e31138a7e809f435df9cdb2273 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.log @@ -0,0 +1,7 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d85caf167e83e98cc38d+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_d85caf167e83e98cc38d+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (9282847). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5965912). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T10:14:43Z Non-signal exit. Backend exited with code 1 and stderr: [MFP002] Compilation failed for the following modules: + Module sg01: [LUR015] Compiler generated too many instructions (9282847). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + Module sg02: [LUR015] Compiler generated too many instructions (5965912). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a04054298eb25d2cfa7301614a31366738e4e5ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9613053a6191cc0d3106c32c958989e02a5e2eb0b4c0dee71dbb64e55098ad2f +size 782241 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..16ad86b9f3ab266e8e32a87ffe4f50573bbd2048 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d9b4078117ed73d95bdc+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1258d9e9070c4e6fa7d65156dec671c682b1cbf800990350de6c4c61d168f5 +size 58133504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..24d0ea099fd931153f65d83999f557ed38d1c362 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4878546bedc065fcff030aceed453c669fb4c4fc8f903a648037421f5f1f41a8 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..1494fe941fc0ff264aac05ca6dcbe8aae6089bc7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da0fb0cd2f564c7a36bc+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da0fb0cd2f564c7a36bc+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da0fb0cd2f564c7a36bc+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (18.886GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T17:10:17Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (18.886GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..28560bc931dbf56b229a82f2bdf1ef5d89bf22a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6e255f534053e0efd1ec470bd98236196d62091cf71477f2e46c069eb4c1d4 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..bcc4b89198f9818e27c6ec2363d426f79f144b68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da9566f57ba46390c838+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_da9566f57ba46390c838+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T10:27:20Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 3001666743828480 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 10:27:20.729959: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 6670336 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e82bea3735bb8936954c63f979e35312338f4825 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812be1e7781179bb361b296418f22fb449ee922f041700670404587296e2b9f0 +size 859976 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..936a689cac57666618112dfa80f4249b30704a86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2d12d6296edfd32572+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_db2d12d6296edfd32572+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_db2d12d6296edfd32572+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T09:15:02Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5755 + convert 1055 18.33% ################################################################ + reshape 802 13.94% ################################################ + transpose 723 12.56% ########################################### + broadcast 550 9.56% ################################# + slice 543 9.44% ################################ + multiply 363 6.31% ###################### + parameter 328 5.70% ################### + get-tuple-element 324 5.63% ################### + constant 223 3.87% ############# + call 217 3.77% ############# + dot 181 3.15% ########## + add 145 2.52% ######## + concatenate 74 1.29% #### + tuple 73 1.27% #### + negate 72 1.25% #### + all-reduce 72 1.25% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4365 + convert 911 20.87% ################################################################ + reshape 650 14.89% ############################################# + transpose 542 12.42% ###################################### + parameter 328 7.51% ####################### + constant 258 5.91% ################## + broadcast 256 5.86% ################# + slice 252 5.77% ################# + multiply 218 4.99% ############### + custom-call 217 4.97% ############### + dot 180 4.12% ############ + get-tuple-element 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + all-reduce 72 1.65% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 8629791888506880 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 09:15:02.622084: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22979837954 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fcd595558757ed4ba63503609fcec036d58de3fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e8b498776a4fbb8d9d7bc104bee53473c2a833583f9b6abd22a9f84c7efef5 +size 626221 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80d195846dbb6fa307302c7be47044db0d5445c7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc6e27c8b152ac1dc9ac+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6234c0f758d43878b02cfb741bf081894bf7015f04efa3174f15eb421b201316 +size 50310144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..76b8ced83e79b411c9290a33a046f4dee6a6ffbf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034a00212a21335810951cc0246a799eab1c8d66dccf458647360499210c29a0 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1a6fcbdf9ad814b13c6dd579c15ca18c619878c5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dcd8c1453b43eed00c33+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775bec48b5559232804366e382f4e5fa33a52d4e94f63c867e0e04612f24dedf +size 17368064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4a0b9c692ecfccf0aeda59dbf506cc9bf77803fc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68db39f59e8daabb324bff25d173d4b205554807d24c90152ec070c7ca211b0 +size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..a38e49b6c649573413275f10a245b5b56ffcf4e8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_dfb053688abb949623b4+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_dfb053688abb949623b4+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T14:29:12Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 250138895319040 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 14:29:12.728144: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17473487364 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a12c86d76468469e9fc929d2dc22b24c6c53dac1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4515e5e8670a797a84bc3fef1412aa154920613b709b446030d5a366cf1b227e +size 848822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..6940bd4ed4cbae95c8c77a0fb115c9f7cd87c2af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e0e2410f0584782f6618+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e0e2410f0584782f6618+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T12:09:28Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 3001666743828480 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 12:09:27.928190: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 35616056834 bytes (33 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ba7ab511d00d8afe3079146ae23efbec4716e3bc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a209b2d9dc83dbfa67791a391e8379c8c0285b720767846acb9057962d6a2e8 +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b853fc73eca6994d76396fed8f3313dbf48ef88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e275893e058d2cf4cbe5+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9917f636b777a9a8e91d819bc8e6bc4141a57dc7b2e36ef62c1d3faf757e629 +size 137473024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db9cc783c097cd967d3fffa60d9c2b1c4de54ca1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f53916a005720b5b71dac3bdeb66d73beac34fd28b8b69a899b700b9999f849 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..27d2daef822f1829483b459a2dee6bf31522f55c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e535e65447dbc9579a04+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b597e1d53f4b8568751b028378ab36a349692e15e818370eab7b3b044dee845b +size 82648064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..24e353a9dd5fc6e650e482485a3d84f4fffea4d7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae24942ccf7f48c898aca83d0b972583aec495e76cb1ab8c28ec0e28ef5f80a6 +size 838696 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12d6ddf795772de556d5463e06eaa69537913074 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e615497c40e4471f01e2+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09cb8780d9a84446562e9fe6cda7d1f72e76808f7eb5e3d471b5d13c68e8809 +size 7066624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a61255edfdcdffc33c86faa0432bc9eb2e866324 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31573073618141c38daa4cf73c88421971ce336e262a4957dc399fdbc5c64151 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..9711f11c6bf32c53bf88cadbf4726e6d71937ffd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e666ad80f7f44ecc3879+fb4cc044/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e666ad80f7f44ecc3879+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_e666ad80f7f44ecc3879+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5ea39fd7250791a69bf7bbcb54f1dfe26bb1ebe2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b96db805279e3ef7af88d669b824f9024859457bcfdb230fd3daa9edefad8dc +size 618478 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1546badd1ab07e99b338e42e04e1f562be084057 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edac86b9ffb002bc0f49+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe554cd98ac6b2eed8ab46f385d696af279bba9b4ca3b9b341d8e8d9be9d9b9a +size 3175424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fee9d823cb0f55ae6e9355d6c1b5ceed832113e1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d0b73f2290b0ec62e4eedc330950c641a20abcb8effbd099f2b98bdcf3155c +size 838840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..040d6f72816e499d137375d55bedcbc4029efbb3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eeccb8e8c987751ce9b0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed597668f7cf6c784e20c4c5dbd2374e333e705aa825273abdeac5d5081612fa +size 16395264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..504ee628146e415b70e9689d34b95b9c3eb6afbb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0ed0a4496a56fa746d7da4f46719a638809411f6cbc81c4b97397b18da5d15 +size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..e457380664b92cb6e47ccffed44fe95e71385346 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ef1a1a009784d6730c82+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ef1a1a009784d6730c82+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_ef1a1a009784d6730c82+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T16:40:18Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 1541721460572160 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-09 16:40:18.136125: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22668980354 bytes (21 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0cc181bb04b9f78cf089210b60d59880f5eb7b60 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657eaad6331813faff32c8b7f629cbcec4a99a117868128225dab1204d801e8c +size 550218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b44ed3c7070831c55a7d7cce31b6ed8e32d6fcb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f081706679e4afd8547d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:590da2246adc43a4c7556aa95d9e60f2240592fa16debbefa06d00f30a900280 +size 56300544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..873c0e660b3923f0ae744691da1d3d87a0af8e53 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc410382b5aa7643170e0208839c95f062ed51f270b4af1e3d1db36d9209e50 +size 782241 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..539d8f278e3fac9c3551e74373157349e5d1d66f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1438cf8178f932a3e30+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f1438cf8178f932a3e30+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f1438cf8178f932a3e30+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-10T00:29:41Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 4819 + convert 1055 21.89% ################################################################ + transpose 687 14.26% ######################################### + reshape 478 9.92% ############################ + multiply 363 7.53% ###################### + parameter 328 6.81% ################### + get-tuple-element 324 6.72% ################### + broadcast 262 5.44% ############### + slice 255 5.29% ############### + constant 223 4.63% ############# + call 217 4.50% ############# + dot 181 3.76% ########## + add 145 3.01% ######## + concatenate 74 1.54% #### + tuple 73 1.51% #### + negate 72 1.49% #### + all-reduce 72 1.49% #### + gather 3 0.06% + iota 3 0.06% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4149 + convert 911 21.96% ################################################################ + reshape 902 21.74% ############################################################### + parameter 328 7.91% ####################### + transpose 290 6.99% #################### + constant 258 6.22% ################## + slice 252 6.07% ################# + multiply 218 5.25% ############### + custom-call 217 5.23% ############### + dot 180 4.34% ############ + get-tuple-element 180 4.34% ############ + add 144 3.47% ########## + concatenate 74 1.78% ##### + negate 72 1.74% ##### + all-reduce 72 1.74% ##### + broadcast 40 0.96% ## + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 210754045214720 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-10 00:29:41.354684: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 22019009028 bytes (20 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e9134a813456c5120eb760640074733d8eee90e6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:941107baf06d12938b01214e5d05d5d16acdb5bf7f802526e594345c30db3377 +size 846674 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3668adb1c7a65d97b30fd022bdd03c2a82dbea20 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f42fa724ddb12e6e9d97+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91cd7ddf94aa9c8c2709e91ab41c02d9df24178301268d3419bc885225e6c4a7 +size 17429504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aec4e5b364661c21605b6e40d03702c97b84766c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c23763836dd26e919ad90ae3468b9b403cb7b5837ae6d78855492f82f7a6d6 +size 550215 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2fb1d48b66b9e50b5839c9aa4d37f8799f43417c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f511d700e615d9f77299+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7997d0baa4021ac950d1da5ec76554a66304b3df016b09d2b8a29442f06162d +size 27423744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8e5192bb52c3572b85de820fc69c7c7f0d5e554 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193f0170b2070508c29b73a34147cc0ef41093dfd392ad00c298c497851fec0e +size 618697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..82112ee87078785072ed9e0fd1ff2d2138413583 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f61d8c6ee177e8e759f0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18386190751b06343cf878b8ab1e08af41835421d704852237573633f6fccb36 +size 20337664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..09f1dfe2d8cf70564a73a07c239fdb1dccb57e14 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6554b9ae335bcff94287a3935413d73cf41cf2440680dd60901bcd5c2dfd0f5 +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0dd0ac8cee0e65ebf530fb315fad56983a43583e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f699344365cf7f52a68f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c76b9993f58c863f53cb959b1abd9e72294a559ce0f3f31b61d85f48296e25 +size 39588864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9eeb4fd4a31b1daba0b3764925389510796513a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1309d272d4b842791724416530cc80bd79c21a9bb7fd75aa2f091b23f3e32dc +size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7d9aa61c942100ee67867954d1900084a3464a87 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f90557dacb3ca5598321+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b3329029acbd5b3a0d4d0d909a9f740b14880152428c73c9c746e53443592e +size 132250624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c20bc1f3b548bc8ebad0867a810ba9beeb3ddd26 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13aeb85cbd16415a535885cd6109ec2309073297ab6aeb89e0b06ea25538183c +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d9d210da823b50c28705ed8dfe03e527751e176 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93050c233c49c1a2125+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b417e8e9d242f6c0c0087657ea16bdea7b1d38ded56dc12bf71b94878a24f44a +size 64400384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6050a03830198e9011d816930f88af8839e4df77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ff14415dacf91d87cfa45303337ff5a2a75bc35dd144b7cef9bc0d321ed066 +size 625637 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..42e9fc8190f5bca8d78ad81a9637080762508df3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f93b45d02683e7e074a5+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f93b45d02683e7e074a5+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f93b45d02683e7e074a5+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-06T12:01:10Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..149bab8984e023fc8d49795d3ee6c5ada12e6949 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca45ae226e38b8ed0481e7a7f400c4fcdedc21d707078d6091d0e33a843023f +size 857100 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..781e54eda6ce8447992800b069a04a9723df4d97 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f9e37ca44ee2fbb4987c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be8cf37c5ae13220a17e926bae3a26fdf664413f3b6d8eba44878af2b4c5c09 +size 112374784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0531aec1e646c1f89f9fa638adbd3437f51e6ca2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea262273011cb56f83953d26495560b143dcab5450f902a3079e42b1722f0c0a +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d81ef891d15a7f43296b4d411b104a188866a9b1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fa42cc49802250e87f12+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb8d363c05524b270d5981142b54c8964af339efe34a51bf60e7621d23250d5 +size 56259584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41daa38fcdef711aee99d1d8fdfb67b95ce0dd9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b993ed6952483a9fa8b7809777b0a808de959c6265c8baf22fd4fde723991267 +size 846751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..b1f977ab33f25e0ff77d0eb1429e36584ce56961 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.log @@ -0,0 +1,71 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fbb4295cd419661a4ed0+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fbb4295cd419661a4ed0+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:45:46Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 481036337152000 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +Replaced 0 dropout sequences with OffloadedDropout +HLO Ops used in computation: add all-gather all-reduce broadcast concatenate constant convert cosine custom-call dot gather get-tuple-element multiply negate parameter reshape sine slice transpose tuple +Invoking RemoveOptimizationBarriers pass +Processing partition 1 +2026-02-06 11:45:46.843430: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 17496556036 bytes (16 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..273f0211b88cb913016afedf51859c57a7c5b372 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65027e7f6d6b6ffe66f22eb81fd095ce1180f3f3c2f3815f136134cf73f433ef +size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ca323139cfb3c3fd81497d3b463321d14f89b7e4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.log @@ -0,0 +1,67 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fc374c20b3ea2e1e5432+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fc374c20b3ea2e1e5432+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-06T11:44:46Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5611 + convert 1055 18.80% ################################################################ + reshape 766 13.65% ############################################## + transpose 687 12.24% ######################################### + slice 543 9.68% ################################ + broadcast 478 8.52% ############################ + multiply 363 6.47% ###################### + parameter 328 5.85% ################### + get-tuple-element 324 5.77% ################### + constant 223 3.97% ############# + call 217 3.87% ############# + dot 181 3.23% ########## + add 145 2.58% ######## + concatenate 74 1.32% #### + tuple 73 1.30% #### + negate 72 1.28% #### + all-reduce 72 1.28% #### + gather 3 0.05% + iota 3 0.05% + sine 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4293 + convert 911 21.22% ################################################################ + reshape 794 18.50% ####################################################### + transpose 398 9.27% ########################### + parameter 328 7.64% ####################### + constant 258 6.01% ################## + slice 252 5.87% ################# + multiply 218 5.08% ############### + custom-call 217 5.05% ############### + broadcast 184 4.29% ############ + dot 180 4.19% ############ + get-tuple-element 180 4.19% ############ + add 144 3.35% ########## + concatenate 74 1.72% ##### + negate 72 1.68% ##### + all-reduce 72 1.68% ##### + gather 3 0.07% + iota 3 0.07% + sine 1 0.02% + tuple 1 0.02% + all-gather 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + +Potential split-points stats: #CC 73 #AR 72 #AG 1 #BN 0 nClamp 0 +ModuleSplitter initial partitioning... #parts 73 +ModuleSplitter initial partitioning... Done. + 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 72 +New disjoint wave: start 2 len 70 NumReps: 35 macs 6003333487656960 +First non-zero-mac/used part from the end is 72 +Not enough zero-mac parts. skip +ModuleSplitter initial partitioning... #parts 37 +ModuleSplitter initial partitioning... Done. +Remat: gather-iota 0 matches, 0 ops rematted +Wrote HLO netlist to hlo_netlist.json +Wrote graph partitions in debug_info_hlo_partitions.json +Processing partition 0 +2026-02-06 11:44:46.083167: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 71066314242 bytes (66 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4a9415195c7fb1c0674fe8523caecbd16e6a002a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0600becadf8552fb59594f02a14f4bfeb250fc3194d33027ebe09840e3affcd1 +size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c94374a1bfd4dc955179c0dfb93801405205c2b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd0e003c65277ac495d4+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918e6483fa8a5d2cb9bb96eadfd58c9ab4fdcaa3b24bd3c11a7c2c793026a228 +size 264336384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0361a27afd2d6a653d13896640c0cfefb7d46cda --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb22832d48af2067f57308dae569b77f374614c0ddb1f02a217b3c2a1b6bdf21 +size 550227 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f6ca546a3355432232e247a5d0bf03e1c7a4262 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdd8f98956e8286e2bbd+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1569e87331d58ac09893a7e7774f29170a805dd46b9c630ba04fb2f9e30fc4e2 +size 66939904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bd54c6b6204baca08d172b09e49e68edc6590457 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371d829661988bdab0d0b51034566239f4e5700d09dfb13785baef443276e300 +size 841224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..47f7d15512443275481d6027f25410205df2eb6e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fdfc889d20b9ef751c6c+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fdfc889d20b9ef751c6c+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file