diff --git a/.gitattributes b/.gitattributes index cb83a0e22c421853fc57ade283ec4025378168d8..c7a5673139db6cbf3d1955a8eb82e24b1a05dcdc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16456,3 +16456,12 @@ neuronxcc-2.21.33363.0+82129205/MODULE_c0300a21335f2118973b+8bc117fd/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_c0300a21335f2118973b+8bc117fd/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fa79bce2a7462b20ad61+c899ece2/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fa79bce2a7462b20ad61+c899ece2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cbac419f8839d90c6bafb19e17441f1a052e93e227f0fa62918ebe7d882e225/88fa555c8659e3f7fc5a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cbac419f8839d90c6bafb19e17441f1a052e93e227f0fa62918ebe7d882e225/88fa555c8659e3f7fc5a.json new file mode 100644 index 0000000000000000000000000000000000000000..bd4e31d0ae2989c96fc2ca341df12c05087490de --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cbac419f8839d90c6bafb19e17441f1a052e93e227f0fa62918ebe7d882e225/88fa555c8659e3f7fc5a.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/49052c1d22e88f219887.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/49052c1d22e88f219887.json new file mode 100644 index 0000000000000000000000000000000000000000..33b9c636176d82db688daae4b39e8d27c195b46c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/49052c1d22e88f219887.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/67070f0a4d500338d5aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/67070f0a4d500338d5aa.json new file mode 100644 index 0000000000000000000000000000000000000000..7f3fa8e8495bf3d59aac9f79512d617ebafbbb78 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/67070f0a4d500338d5aa.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d672cbaa2e018da4e4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d672cbaa2e018da4e4.json new file mode 100644 index 0000000000000000000000000000000000000000..804bf20471478f848a86eb7f7fc5eae61667babc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/99d672cbaa2e018da4e4.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ad354086b250f133c9c6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ad354086b250f133c9c6.json new file mode 100644 index 0000000000000000000000000000000000000000..63ab76e4d60875c04dd59a67df04fe91a367ce46 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ad354086b250f133c9c6.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b8a28a3ac7bcba98b595.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b8a28a3ac7bcba98b595.json new file mode 100644 index 0000000000000000000000000000000000000000..895ba4173f67deaea8634f138c28636991bdde0b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b8a28a3ac7bcba98b595.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/fa7d033eb1dca73fe9c3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/fa7d033eb1dca73fe9c3.json new file mode 100644 index 0000000000000000000000000000000000000000..9f1d84d7bdeb2c815be9f0105218a0550d5f283d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/fa7d033eb1dca73fe9c3.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb index 0ac971d5324bedd274c62297dc1457efb177db91..42ada8319da0a82c5ebfd6eb3618de58c90fca30 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a3939beb88e18a29e1e+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49dae5bb2894dc0c7ea22a320b4b2de1774e3f5b385da2dfd4bee2d6f0ab9d0e +oid sha256:f4962df721f1ff99db7ed9282a0413c045e2ed26f72534f5bac1b8e92cc90ab6 size 855517 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb index 182cdccfcc22d9762425ba6c20369b9ee9741301..b4b5fe1eb74b8d09ecf171b033452e133862dfdf 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1a41189de75fc0ed15bb+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ae3632a5072d91624b76d190b5435e08c58ad0d694bbb070627e66714c00d34 +oid sha256:30d9f50b25c8ca835fdfe8f59ab1803840811b018c6c0ca09e8cd8e73ad1cc23 size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb index dde8ce32a1a0da51d58eba59b673ea1dafb478cb..78feb0839a635c4bb1d754e527dd1586cd660f10 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1afb27ccf10708e03947+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6c082016fb78c0694a6b26131ba3769d0366bdd1be75fcf052dc0599f9da811 +oid sha256:fe09b160df7c75422ed8c70794cba8f700532ea5e055a888f3218d4ddc133cc2 size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb index 9b55b8fa90c1855e7eb820a015dc4b33f02d4413..4a23078cb2ab5327a18367d13b97d3b3e28d458e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_269a21b3e3cb06fc787d+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce7965a675031fb6ab8179087e299eef62fde401cc47e3276351e8e7ff0f2022 +oid sha256:510d77f70ca853cb4b1c6a5c212deeac8665a2247e19d253089cc6baad1bfe5e size 848819 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..15be505c79e4d44819768a50b906df0e6942dc50 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6586f1806f42425bfaa3fdfabde1ed3e812e471c3fb402ff55d086a77b2fd77 +size 964631 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a26edb59389a7a9850e94a09500254158b8aff5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2831e4c199ca2002f484+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872e6e30a502e8a2b1b521ead33019131caf567e16724954de06860e9920423a +size 9585664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..271bc1b14d4cf6235a1deb287fb12bfda1a052ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332badac6d5178793a73d00fb629bea96371d302c0a2c06dd5f7b839fb48ca69 +size 846674 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ea9d9c2bc94b8f6c8cecda557f6f9cb7a072b836 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35958e2f8cf19e5eddc6+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d30fbd63a17bc6b510ec148961ccb47b7572e3d23faa33b5bedc8ccb1257ef +size 18801664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb index 57acaadb781deaf6b04bac7d3f18657f4f6ba8a6..9e6685e3369137551dea835a8ce2fd6a51a2db52 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_54baea067a19d8c74866+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ab1648fdab97bf68365e1aa5973e5b6031c2705415d4fd1b323d85ddf5087ee +oid sha256:294ed9b3de40b9ff7fc577e30fca96ed67140aaf4a582099c8372ca196a5d0a8 size 859976 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb index 9382e5f9ef4eac58b5082a9e63144451d90dcbf2..4693ec4cae5c5d8117b8295bd1fa2153ac0af336 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c4a6aceb0aa544053e8+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9afd9b922494f37910fd123b49a349abe77e409cd972f1235a17fd3f9528a78 +oid sha256:052b3e66105967fa30b2f55fcb561b2c9006d27f850cb49cf74c4bdbfef09d5c size 857165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cae2f7c0c8c12232189de51f30a33cc2b3005193 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be6595e498a428334b25dd2f1c128bc34c04dfd8b3f7ddb7b08c1124ebce3b3 +size 978680 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4ab761f2fde39a3c1935c283b6241560c1198cba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52670f09ed33bad9c7f72e08060928342d842d55c0cb3e24b2976fa91c926b98 +size 4680704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1917b2c01670c9f9a0276c6db5f83f0c45db2413 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5dafb05b2fa15b606ad4+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836aba59ceabd1cd7e53d2b75579736fd1bbf20405f14c62fdf85c56cbcb312c +size 4855395 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb index eb67db5b3314f4eeaa919eac004969a9f2e1b10e..6956bb145a833d357ad0365748ffe20493b7686c 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_63b228a1aedd2e24d261+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5a83949de28fde124a6106707f8c43c8434bb6c9497076254c57018052a7a49 +oid sha256:64cf5020409415c8e7ac78ea6885a166b6fedefb10771a68c14ba7704c559b0e size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7c5e62142147097d666a8304ca2c57fff945fedc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe53cc824521d9307b6a7bd6d11b686fd303c8fde5836aaf2486c21598a2108 +size 846739 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fab708a4f4c5ad103f1e949a6e8b5180fdf93e68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64886f8b7709113eb14b+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f93438c0e69e302e132b757c2d8cd6cdab76bf163ab872cf7493f09172ccb1 +size 26665984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..58ce2a894208e92dd6e4bdc104b0747077b16e33 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843db1ac96310937ce94dd8aa7867f17bd07233477d0bd666d88c861ab033274 +size 839280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ae693e51867e1e168563d3f54a368cf2a0e38dbf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_758311bdb6e777cb53a2+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99e257a60373b0a0fad68c538aab65902f9519537fec23a7e65fc72d329da42 +size 17705984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb index cb068f0ffdf608210208ee83970889db5f142a75..533408bb97e94bc231c77dbd6cd7571215ff9cb8 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81503bf195cd5d2e22b3+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:155c6e081c7eb1c61ece71fa77412b3525882a84d8a82f0347908930458bf305 +oid sha256:7dbf1d0c8b099ae011b2f2c6c9098ccad692b88500c77060395fc45117bb57d6 size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb index 82c2a61b1b5e5caaabca5f77cc78cbef3da4d8b9..e00cd8f073875ce20b0a79519468b49bfca88952 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_87ac40e3234811a067b9+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e45c384167132d812fe4c7c2ee3cae54466a96795a354027643ec4f3d9650f8 +oid sha256:d88d7a8112d0c699bc222e2840023091b282cc19dbc8ec4db56c2f16b3ee423d size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb index 52330a0442e132bb0ac847101d36f9af526856a8..c151ba9c8d327603a091a8a0d4f720fd432a9ca6 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d8d38359e9e9f052a77+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:193b34e4fbfb73b0e96c027774b7518fc7978c624a2c8d92e76a7b6b110bec34 +oid sha256:5a2529d821ddf64b20d4049d1f86abfc2fb1243068aac24799f77eb0ecc109e1 size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb index 8180b85f333a4bea0853ca5a1715f1c6f7279228..a2a056c8bb9e9fdfbddd5244b26bc2556ec28e48 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9a666844f4acef43578b+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e07f46d157553a8a739c6e9f5624d2082b1a1a0d09b34b31b5e1f658670c74f6 +oid sha256:22b506d19399abfcc0502e27c2eed2a9e9f03b475ab85185faa2d3cacbfc5b81 size 840640 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9448436f407f9041197f76553ab1f0d480dd817f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0b9c4ee4906c651bfa256315106235b617658068447866ee0587cfd3435969 +size 846739 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..25e72627002c076f3d627ba3107ec45daae5b220 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aaefb5ede5ee0772566c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1947873efe9a373e8016e55285025c7e1dc5af4d006a4b885a1927e4e1e416f +size 75367424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a7639f46d0e603b94e545b2283e8771b7eb714fb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a796666ba49f04593eaab9a45bcc19a3fefde4129103779a7c65887c9ede250b +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a9036aa96f0b90cf6b07955f619ce086ebcd2f77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0cd0a9c4d91cfdefe87+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cef71738055a8afd22c9bcb71943a1bcb314cdca7c2bfab354c8e197455852 +size 53105664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eafac9b87c282891226cca801858cba3ff7bc0ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f55bce31e2660b08a6b612b7aec51341271e6818bb861f8b484603c45b6cebc +size 847459 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..fdd4a4e311d49c69031127c66f7c943673743a90 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b234477c449ee671d3c1+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b234477c449ee671d3c1+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_b234477c449ee671d3c1+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.189GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-10T10:06:01Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.189GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb index f12cbd0924dc35dd832f7158737ebf59b2ad3ba2..ba749086b6c34054d00da4168b157acd25a6db99 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b66cb711b2665f2307f9+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c885e2099288ea6db6ae55e6a254cf3721cf67171201bafda71ecd3d5c3d7f7 +oid sha256:0156c0f36833f8f1b97a41334bef9eb49ea095defe2649de56426a5e7f3f58a6 size 846875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb index a618461497bbbd25c554319aa9daf6efd7cba5a5..9b4ae74cfa4b6fdf884f440df857590e1ae34368 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ccac30ec5602d9f1f532+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:354eb59dbfa69b026dfb2b3398a9b2a549ae0069b84f3b1b4af1f9cfbc61daf4 +oid sha256:d3d9458dea28dce8ae4d19abe35f26ec4865b984b0c2b6ab0032b34331b380bb size 841224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb index 57457760db29ff119309cb7cece252ec078cce5e..45fac5cbe24631976ffa9f65b82f2fabf9bdf538 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3bce6a0381735dc8108+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d5fd467fd91f3b3dd73b6c8ed27b5f9ed16f2bbe3ae2f18fe7fc4cb4995f708 +oid sha256:0a084aa0d2e12efe24048614a717b57e55527c719d766fac1d99033969705ef3 size 851650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb index 692cb22597201e3da583cca101eab24bb8a296c4..a0c60ca7f65e0086769227b68e21a6dfd18a350a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d85caf167e83e98cc38d+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cf5b2ce1c42b9ffeeae9ec8bd9d00115b48a1d54983088830f9a1f7aa3a9f8d +oid sha256:5acdb5b3525c7db99d322de1f0da9f7489354778b8225b6796abbc47174511c7 size 635925 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb index 28560bc931dbf56b229a82f2bdf1ef5d89bf22a7..3891dc5886224346733534707443dc5b8017189f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da9566f57ba46390c838+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea6e255f534053e0efd1ec470bd98236196d62091cf71477f2e46c069eb4c1d4 +oid sha256:7206790a65402689c85dd551033931435fd0e54afe8145fd8a4f825ec04f12d5 size 859109 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb index 4a0b9c692ecfccf0aeda59dbf506cc9bf77803fc..43e11007b98c28c8dfae49ee0210da697a2ea1f5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dfb053688abb949623b4+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b68db39f59e8daabb324bff25d173d4b205554807d24c90152ec070c7ca211b0 +oid sha256:4a56050dce420528119cd9f8396a5369a536cda48b4f37695bc2fe8c7e8bde6d size 847335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb index a12c86d76468469e9fc929d2dc22b24c6c53dac1..683a062fa80afd7d76a67cac554b8213e68724e8 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0e2410f0584782f6618+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4515e5e8670a797a84bc3fef1412aa154920613b709b446030d5a366cf1b227e +oid sha256:715832f65e2f2dbb3d87147f5cd0591da3c1edec20bafe35fc3d67d379d13c14 size 848822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b84855c0166c5de09116d9d04fd68c163491fdd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0e97b94bcfae923a63bdc93deeffbed73a6439fc846050281353c150ee0fa7 +size 839424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f35547d237dc802f684e3d8003087c81a775644a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f81dd9a0c4854a438c30+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec087c88fe6dd92637cb33f9a2e530f4fa11db959d64712b285ba25882cf340 +size 20368384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb index 41daa38fcdef711aee99d1d8fdfb67b95ce0dd9a..8966143abe960e0f5f70b885fa2622378a4d638a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fbb4295cd419661a4ed0+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b993ed6952483a9fa8b7809777b0a808de959c6265c8baf22fd4fde723991267 +oid sha256:a5db0b8cb335a62d213ccb8d6a2ce7e7ac6b0e9d5af553eeb2ec3657ce20a4a5 size 846751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb index 273f0211b88cb913016afedf51859c57a7c5b372..1c2ecc0b52c79985efa7d15a8ca034ad2109e848 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc374c20b3ea2e1e5432+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65027e7f6d6b6ffe66f22eb81fd095ce1180f3f3c2f3815f136134cf73f433ef +oid sha256:443778a0136e168f520c56f7209c76ec817d0de0f5084b549c8cb61cfc0eb67e size 854933 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb index bd54c6b6204baca08d172b09e49e68edc6590457..539e354c2839231c5f057905dc7ff79d39be9690 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fdfc889d20b9ef751c6c+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:371d829661988bdab0d0b51034566239f4e5700d09dfb13785baef443276e300 +oid sha256:a2ff22dd689c9eef53b0701382067dc357f6b985824813b741dc5a885ebc0a14 size 841224