diff --git a/.gitattributes b/.gitattributes index f2b972648491d424ca1f3cac8ed5350aafe3eae5..de921ac7e90936eb8052485fcf53e1bc84f6aa3d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -17042,3 +17042,11 @@ neuronxcc-2.21.33363.0+82129205/MODULE_eed91f115fe8c2176712+4394b9d6/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_eed91f115fe8c2176712+4394b9d6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7ba8c743ae51687c222d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7ba8c743ae51687c222d.json new file mode 100644 index 0000000000000000000000000000000000000000..815cb54514f18d1015e26f2c8d91829813afd787 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7ba8c743ae51687c222d.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/b38c4dd37be80535e108.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/b38c4dd37be80535e108.json new file mode 100644 index 0000000000000000000000000000000000000000..e6319ef2793b2a63a3b22a7adfc5c48f7e43dfc6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/b38c4dd37be80535e108.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/01ab39323144501d6eaf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/01ab39323144501d6eaf.json new file mode 100644 index 0000000000000000000000000000000000000000..b284e04768fcc38cf434216f724902e909ad3074 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/01ab39323144501d6eaf.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/a28e46704c048d210631.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/a28e46704c048d210631.json new file mode 100644 index 0000000000000000000000000000000000000000..031fa625bd3068e8ec65e76de1b37c4ec982c8de --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/a28e46704c048d210631.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-0.6B/a28e46704c048d210631.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-0.6B/a28e46704c048d210631.json new file mode 100644 index 0000000000000000000000000000000000000000..031fa625bd3068e8ec65e76de1b37c4ec982c8de --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-0.6B/a28e46704c048d210631.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/b38c4dd37be80535e108.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/b38c4dd37be80535e108.json new file mode 100644 index 0000000000000000000000000000000000000000..e6319ef2793b2a63a3b22a7adfc5c48f7e43dfc6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/b38c4dd37be80535e108.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index ba27f47bfe4b7e123c853f25f9f3bb2acf6894bf..577a76abcfb57fa5cf682ca0b7c4e2c6cbd765a1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb index caa215de9ecc819b26539ea04f6d7b719b525bba..d50f9ba71ba91db63c4cd58e5aa80d9a1bf5c57b 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d18c81c6ab7ffca4593ffd12280271d86ca9a2700a2770da93fb314a9109ff67 +oid sha256:b1e617c5569524a80c9f6d97e87ad348e3c21889de715297f18f50d886f1e81e size 509380 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff index 52e5de160ae6eb6f0bbcc4df34d8809b957ea935..b5ca6aa9438f1f3f03ba3a23be88f6fa4049267e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45d13b5a6f47ee0e1c7c9b783470687dfcad53929b4035be0b071bbacd718948 +oid sha256:2586baa46365618b851f6bc750452d003d50e54b8b7734e3bd272eba298d7065 size 41585664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68236c94c84fd79eaa2a34b36fdd3508cd3a3bf1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc433e7b703d130e091b01dadff3c52a79577463c64fe2bad4743e10fb5ed466 +size 1011733 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfa34d4441628dbbb07d894ea8d4d8004f8c06fc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a42b4aaa6cda83c40f5f7d37993ca0c5a1837b5cca852e0144cd487ad3648e +size 40387584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6362418c049ff77b2d100545feef6d31070fbd24 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb21ddc44f42e0a984b9c1c6b453bee69944f16edcc891c120a1bf6fc5006494 +size 719476 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.log new file mode 100644 index 0000000000000000000000000000000000000000..90998c663a82b92df5b85a88495e5b9687ad7a58 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_1de7a333d337fe35cb9e+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_1de7a333d337fe35cb9e+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-03-10T14:33:04Z [XTP004] Number of instructions (6729320) is over the threshold (5000000). - Compile under --optlevel=1 to create smaller subgraphs or use pipeline parallelism. diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d055815ac94bd4faca6694a9bc22e0bd748289aa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2a056d98b98672d37a1e08b48bf6f37953aba9b4c616720de52a80dbeeabfb +size 693532 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b0a74db292445e1b22bf34d07a1d4413b2296a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a605026a3de07a515ce0814ecd12488cf6cb11c72ed4113ad5cdde2ebdf566e +size 5039104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5bc4c19719fbafe438244a5f1204bf24fac82584 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b82959150728b621645262435120008706a4ef119a943f7db4a6d90aeb430c7 +size 728309 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86d30e9ea66844dc56edd30819f135110e69b597 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed190bc44698b591685d8b6feb0da87c5d1629abe304ab17569a3510a1a0fab +size 7117824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a24e23ceaddc278d46e695b48c368a46dd615943 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e34e480fdf7a4270bca4077b8ded8f08218d466c960d9cd822ee3690dc8e334 +size 7264840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..764f8c0765e11597ec042734d6f8f97960ec11e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_69445d02-1e2d-4f77-9ed8-c9029402b637/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f2ee42eb9f1aa790cfddd8e066da7c8aeb1214d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f96487dad4bb02b98bf2c955fe59650a5fdbcf1d763fdf56ec412b62b5774c +size 5596 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..807d2388b97699c4f9704e844acbf16bbc2e320f Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f88e472e8013aa2e0d28d9721a147bdd17f8ee44 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eefc21dfba0ef82c19273eba57dd5a0bf0554faaa2d39d11cd99f0c330170b5 +size 735532 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2be98c580c6b287aa37064073ffa2df534624ad --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120cbfa9f71dd5236635bc11c0b1e835fb832b9c0ec11ab84e350f086e92b1f7 +size 2305024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..048cbb58b405ec7a641d5d2ef197bd88ae630363 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842619c244312132fec3c4e8bc1ad737c011b8de21b6e4a28657d07f6e0b5dc3 +size 2442081 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20aa22efb9b7bca03f3161c9b3e5fb27a6661115 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64981f2c590ab72d27b23aa6dbaa7507715d804527dff4824f5b8a704b2e29a8 +size 426769 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..84efe9938d42fd7f5ee025dcb86b61fe9214b9ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7ccb3c563a4c0fdae234892df68dcd6c60cd41674bf983c7a5693709d2af9c +size 3073024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b61449e8786a2d6f5af41ac47d0f24a21d57ed1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6ac7c0556c0e218f9e95cabc999fa5d73cb79aabcc96ccbc57e3c5325e36e9 +size 3147125 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a47616421b5397f34eacbc54aaf8f41a1c5c656 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_4e549635-b641-407e-bfcf-0de9fb256e44/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1122d44653d734af6a44cd149690f70cdefd9b05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a258ce111db27dc5ba46ec4f9f6877c56b0777a5534c0eba14488ff9eaf298 +size 5596 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f9c893c3062fd55002fad8ce18b02eb3664bc5d3 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..b11c4c2027752634eb93271357606494d009d3cf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_de4f3c0b-dec2-4039-854c-30a1f038d511/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..36897c006be58fcc135059831fe41ce96ad676a1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e542c76cf27c3f807feac47dd3f58fd4c308e8d9e932f0732774bee19d640d8 +size 3881 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2363eef198b54da332e53dbd6b6cbc77264827fc Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3de30b75961eefc371bd9e991f25a93b964e6044 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1a5ee6fa-840a-4fa0-b1c4-3b83c44912b0/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c213570f630857303e7b0ec653018546c02dc983 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5efb0ea01dc0241314f91e19cbd9be551d1a66e7dbadd7cd4d6473d6b7d4b99a +size 3881 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7e44e575c3f151ebb1529a3b6fcc65563f33547 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.neff differ