diff --git a/.gitattributes b/.gitattributes index 2e8d203e94e7151833df944f6d346c7c7f892091..3d0e1a16869ea047e7e6d10c7811523dc33c8e04 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7234,3 +7234,14 @@ neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4e1fa9712696dd29a0bc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4e1fa9712696dd29a0bc.json new file mode 100644 index 0000000000000000000000000000000000000000..e5d294d4c97e7f78b6b699c10331f5f657492f5b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4e1fa9712696dd29a0bc.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80514be9a02fddc9c476.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80514be9a02fddc9c476.json new file mode 100644 index 0000000000000000000000000000000000000000..f00c4a0bb828a4180c48699e7c2fddf95507236d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80514be9a02fddc9c476.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-4B/80514be9a02fddc9c476.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-4B/80514be9a02fddc9c476.json new file mode 100644 index 0000000000000000000000000000000000000000..f00c4a0bb828a4180c48699e7c2fddf95507236d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-4B/80514be9a02fddc9c476.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/96baef0f2a01e5e29193.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/96baef0f2a01e5e29193.json new file mode 100644 index 0000000000000000000000000000000000000000..5466dc631cc2f246170655238a0c25aad8f4307a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/96baef0f2a01e5e29193.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/d73c51dc7dd75010abe9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/d73c51dc7dd75010abe9.json new file mode 100644 index 0000000000000000000000000000000000000000..f948613657d453c42fb81af5e98ad60c8d41b3d0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/d73c51dc7dd75010abe9.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d182199b5d53c98ec562.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d182199b5d53c98ec562.json new file mode 100644 index 0000000000000000000000000000000000000000..73ba63116facd245ce64bc19bf625ff1f2161c18 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d182199b5d53c98ec562.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/24e7f0a205508b46b0eb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/24e7f0a205508b46b0eb.json new file mode 100644 index 0000000000000000000000000000000000000000..bab7ea4566489d400ba79fb05a47d27eac9d7bc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/24e7f0a205508b46b0eb.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6e2489df1bc1e5c5af63.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6e2489df1bc1e5c5af63.json new file mode 100644 index 0000000000000000000000000000000000000000..56b79ecca01c21f73d716b552ada5438c3c4bfc7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6e2489df1bc1e5c5af63.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a34b748f9038b1e376dc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a34b748f9038b1e376dc.json new file mode 100644 index 0000000000000000000000000000000000000000..50374928356cdd91bb5967eee4db47a3202c0c7d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a34b748f9038b1e376dc.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/d73c51dc7dd75010abe9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/d73c51dc7dd75010abe9.json new file mode 100644 index 0000000000000000000000000000000000000000..f948613657d453c42fb81af5e98ad60c8d41b3d0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/d73c51dc7dd75010abe9.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-4B/24e7f0a205508b46b0eb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-4B/24e7f0a205508b46b0eb.json new file mode 100644 index 0000000000000000000000000000000000000000..bab7ea4566489d400ba79fb05a47d27eac9d7bc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-4B/24e7f0a205508b46b0eb.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e7cbc7270350107f4b64ee37b6ba6b22c5a5f716 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1418a3bc01d777bbefb8b367e96770bb02210c82476f56c4d78293cc1a7ea8bc +size 848773 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7739dc0b2f58ca9d90f63b99d97b6d36febdc287 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc69ce1b547d85de7a704e3830e155358ac4147366aab81cd690f7274405844 +size 4619264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..73eaaf04f33e9cfc68b0b3cb521d289e8e91eadc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92837716166f7cb39c6297bc51bacaccdd8471f7a7cf4eb5a90aa139ca3f28b +size 628810 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a5a496c55ba7e8f779cb8f1120bd09ea8404bdd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e794bd527bf159e760f7e574be86936f0708fb508abd78488a3d208f0ae63803 +size 22672384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7f49f996cc53517f18a43200af6e2f3a21c0a19c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8223a0880f04437df80f89de1868b68be53988eb9a109c31fbf2423e0595ab55 +size 684041 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be1a1b9f03fab3b4329a247380dfff3446ffc8a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b87a1ef2e12d7cf6675e391738773cafeccfb746d2312644814bef9e634af2 +size 7732224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b17abb702e612bdf4e30cdc70f615b9d8409cf17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1eab0e1e36ccc80e01b9fc5973caceec04c50f7f81d61ab27925c6145addeec +size 412156 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb4429a9f0e71b6f6c5a28b1afa3ddc503bdf8da --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cde39dbd1fb71e45804b6ac56f97815205e6a7d6a7deac0af9414593434681b +size 2796544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..422b1f5f03645c1e36c9165063ade20f03ffa69a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d79d2330194768dee128be01cd0cbea2a63911f50e0c35b64ea4960ea48c493 +size 848917 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d4ba28bdb22cb6f11865d7811384b31a04c07a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1571335377de3b6f476c55def0ba070f37fddb4a06253cf0f8b14da0e22e80 +size 11254784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb index 5bc4c19719fbafe438244a5f1204bf24fac82584..00c7273a49d92c32e40a5da3f8f745d07f7765cb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b82959150728b621645262435120008706a4ef119a943f7db4a6d90aeb430c7 +oid sha256:9d89321cf197c8909f26412855cae21850d77301d0ab013b344fb30575a4a7b5 size 728309 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff index 86d30e9ea66844dc56edd30819f135110e69b597..ac35d7514d3a35626968ccc0e873c23aedca5e6f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ed190bc44698b591685d8b6feb0da87c5d1629abe304ab17569a3510a1a0fab +oid sha256:d7dff02b9d351d1d8c9461f01f9409fd21e2e118d101112e6e577cdc79664333 size 7117824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo index a24e23ceaddc278d46e695b48c368a46dd615943..1cf74fc5359ca912cd507fca8663c6eb62244525 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e34e480fdf7a4270bca4077b8ded8f08218d466c960d9cd822ee3690dc8e334 +oid sha256:f90ab0964874ee64d7464bea3df754dd92e3a9b03a300d19dc9db7063702dbb1 size 7264840 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef80f04d6b934748f6cebf91ccb1a2181daf0c39 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a344a8bfb0bd70d150a25d375f0a7ad1432403ec9c9a9455db46debc4a8e84eb +size 727217 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..65b234907da9100129d75b53c6ca7728b91563b3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e6a3e924fa9631c44d91b12bc5ea4e843680266ab0e765a4989e699c430fea +size 3472384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bd5f3fecb3470b5ec91c8d21b0b956d0c823518a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdacf6d0dbac59c6eb12d348bb6273380fbbc85b63605b48e5bff0b6d951e062 +size 3610166 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3766f4b13e3c48c89718978c6d8a6066c2c96536 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5fc2ae86290cef43dc29da5a7a8505a62af08f09 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffa25d516e37232d7dd03d3456190d81b046f1be6c8899eb4d314ef378edd56 +size 1171934 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3766f4b13e3c48c89718978c6d8a6066c2c96536 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e9339ee1f6eca11815d21e9b582bb7ec82e9a77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61cf92085320a14cfb797578aa3fb39f5cba66690c0019cba79b75a2b244a32 +size 1165790 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba1851df5d5295a17fd97aadcfbcb9d5ece70e73 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a5214416bbbf27c2666b320c779196f07ad6ebfd0ef705474d73bf5b4c3817 +size 49859584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb index af1a1f48a5ef3f69669a415d275780e8bae3db4f..7abae44a2482157ce3494c29fd25be0092c7b214 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12bcc80b2ad2cb1e12abbca61cd3f324f6f4f564cf78d121d0f5fcd01305c683 +oid sha256:b364b15f88cdba27d3ac4973f7dcfb2cbc49da9abd971bc1f0c52315364d55dc size 451319 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff index a2fab63b246f77f2b81d64bd3aee76b01e3c995e..4690636950b1615801ceb4cf45015cb94597fff5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdf04a70eb8bda2f7f4e49bedb2c346136c7e9b0dec32fa7421ea72474563fbe +oid sha256:f173e6c1f419b379767e720c82df3bec9c7942c49ca6ce29905fdea0ee240610 size 2509824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo index 23e1a4ca305899ccce17e373419202f11f100017..136aff59a149d5d7b88632d5f20bf7c40f54fb61 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c84f3a91584fb71a18f3ba16dd9c1cde9917ff28ab72e432ab03a61ad722f8dc +oid sha256:5cde9a5e1a781062038054f87a06889f4898ef6a5b5a50ae52638bd62e04342a size 2583911 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3766f4b13e3c48c89718978c6d8a6066c2c96536 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68bcac39349f37345b485c8bb4b10fb53f5177ee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4be61f6ad2d66bf18356ef6865ff426d5eac08ee5eca52de1f505897657fb8 +size 1169886 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..46a01f8fb83d584f9ef636ae8e2450fe57a5072c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb59267497550d7dc9c8503a984352bcc190cd8aaf30eda9db529ac7bbe58db +size 70718464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..86a7fa9887e99faab8753db3e6f9236cad66f725 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3449cbcd42919617c2adf34af7dd76f0595291ba320f2b61496393f002b44b77 +size 436655 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff3e83e9bb06a2b10d89d37e5e2c1dfcfedcd05f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882eefb45a12f7b3c1be6cc27c0c5bf28531fca29b697a25db83681c5c15293f +size 2929664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6670969157ade3504955605ebbf14e2da0f9fba7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b391249b18930dd3f00be3c319e83afe679e55eab6f6d433358c36091a27b0 +size 3003751 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb index 59a076083253d9de16cfd51e1ee597ae9e8d3b14..8cd95de5df78c2d142b4d504f28394664266cb01 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:726697ae6d69106a35fc6caab871c35bccbf129873b887f75986cfbdd687e625 +oid sha256:19abd02589b7e770cc7d22a41832a1d7c1b18e5db1cdbd0f57e95939951b2311 size 588724 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff index 0c16d5ebde5b7936f39a7e05d141a8d45373657b..8a86e392782a0e17219dcf0fd792c3f56b0c1ad5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7c258ba5a15e6c435b0719ac65ac7aed62119f5d3d3dc497bc8557452b5afc7 +oid sha256:9e9a16db24435f3344d68819315ac43e7ec035fdf41c68ba517491e1cc7db394 size 1659904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo index fe11e2ff3fdd06413ccf6d281331049dc701a936..2ae85b1e76a78757ae849161d937252b26a08f00 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd30f302fc03f3e2cbf90294931da3ae0f5faa5a5f0cf021340988ef20175088 +oid sha256:ee0ea35ca5a7f2dda442d215bb13c572d23bc55b448a956b0f57522e111c3d11 size 1782293