diff --git a/.gitattributes b/.gitattributes index ccf3c201d9205ffa419264f7b950592212edfae4..823f4b55a5cfc9fde78b533a49caa11b4812d817 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13017,3 +13017,18 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c83b0bb7257da8a674c+9cb2fe70/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c83b0bb7257da8a674c+9cb2fe70/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_b10fffb847021ecf80eb+562c86ea/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_b10fffb847021ecf80eb+562c86ea/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/943a2aeabe8c8143be7f.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/943a2aeabe8c8143be7f.json new file mode 100644 index 0000000000000000000000000000000000000000..c9010280f3947ea86a7d2568e43218786670da65 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/943a2aeabe8c8143be7f.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/1f52dd56ba4b6c4c1f74.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/1f52dd56ba4b6c4c1f74.json new file mode 100644 index 0000000000000000000000000000000000000000..768b9ec9a91bf3c6efcce8fb5cf45724bc0f63fd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/1f52dd56ba4b6c4c1f74.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/e453ca73065785a3cca2.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/e453ca73065785a3cca2.json new file mode 100644 index 0000000000000000000000000000000000000000..486fad31bbef85aaff9fdb61a8e43967c8fc7397 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/e453ca73065785a3cca2.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/eb5e22c6ea67b0c94017.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/eb5e22c6ea67b0c94017.json new file mode 100644 index 0000000000000000000000000000000000000000..6cca3c1db3d167fef60671b040481aabe3feda68 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/llamafactory/tiny-random-Llama-3/eb5e22c6ea67b0c94017.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/8b999d87128760e77f02.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/8b999d87128760e77f02.json new file mode 100644 index 0000000000000000000000000000000000000000..649d036ac1c5643a8101e3d748089c976e1cef6d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/8b999d87128760e77f02.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/c4f37eab4a64f353cc38.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/c4f37eab4a64f353cc38.json new file mode 100644 index 0000000000000000000000000000000000000000..5531c5f14ef1e4e3a675ee8162a27778db86a1c1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama/unsloth/Llama-3.2-1B-Instruct/c4f37eab4a64f353cc38.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/tiny-random/llama-4/4a8f7bc879bccb57966e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/tiny-random/llama-4/4a8f7bc879bccb57966e.json new file mode 100644 index 0000000000000000000000000000000000000000..25d816e3c249e8ff482634308386663ec35ed299 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/tiny-random/llama-4/4a8f7bc879bccb57966e.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/mixtral/dacorvo/Mixtral-tiny/48f4a9ff0be5e8420a1e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/mixtral/dacorvo/Mixtral-tiny/48f4a9ff0be5e8420a1e.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4afd1410182819f211774ad6378adc85e66058 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/mixtral/dacorvo/Mixtral-tiny/48f4a9ff0be5e8420a1e.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/phi3/yujiepan/phi-4-tiny-random/b12b7ef8a6c0d7ba0ec0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/phi3/yujiepan/phi-4-tiny-random/b12b7ef8a6c0d7ba0ec0.json new file mode 100644 index 0000000000000000000000000000000000000000..7338dba0d6091194605cf410415cf7e9660236d8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/phi3/yujiepan/phi-4-tiny-random/b12b7ef8a6c0d7ba0ec0.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/9e4a267147d9ad7df5ef.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/9e4a267147d9ad7df5ef.json new file mode 100644 index 0000000000000000000000000000000000000000..ef9a820ab41e49f6812307ab056a4e82a9be2e25 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/9e4a267147d9ad7df5ef.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/a00506c78d9c3ead4d9d.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/a00506c78d9c3ead4d9d.json new file mode 100644 index 0000000000000000000000000000000000000000..5ac1f6a35b5d983bf1f49c1fc5c91164c1ac4525 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/a00506c78d9c3ead4d9d.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/b52a2f6aa7b5f8e5bb5a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/b52a2f6aa7b5f8e5bb5a.json new file mode 100644 index 0000000000000000000000000000000000000000..3840610ba880cb7d90e4ff510d4433d768585bc7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/Qwen/Qwen2.5-0.5B/b52a2f6aa7b5f8e5bb5a.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3bb0bffc287e06003671.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3bb0bffc287e06003671.json new file mode 100644 index 0000000000000000000000000000000000000000..2c44d9ae80d3ad1f7485185e652f512acd4c42c5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3bb0bffc287e06003671.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bc617080acd95b57b216.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bc617080acd95b57b216.json new file mode 100644 index 0000000000000000000000000000000000000000..b61f815cb6c9d151c2265390f6ac0b8215d08fd0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bc617080acd95b57b216.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bd1f95813da05a700637.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bd1f95813da05a700637.json new file mode 100644 index 0000000000000000000000000000000000000000..17b8da6216a960708071f550292d46ccc3b8bbbd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bd1f95813da05a700637.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/385c39cc1e6ad9337849.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/385c39cc1e6ad9337849.json new file mode 100644 index 0000000000000000000000000000000000000000..06b4273c0e6ccd594b8876179a069e307049e219 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/385c39cc1e6ad9337849.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/1dd41ec7acb6511daeb0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/1dd41ec7acb6511daeb0.json new file mode 100644 index 0000000000000000000000000000000000000000..75023b1540a6668bca728618be24c6939eb7d8db --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/1dd41ec7acb6511daeb0.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/e0ff29206a1e88178465.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/e0ff29206a1e88178465.json new file mode 100644 index 0000000000000000000000000000000000000000..c74884181054406fd74676eedbd7febbba1d916b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/e0ff29206a1e88178465.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07394cd526dd59fe8a8c04c8e50d77eed8b73ad7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_2b919496-3f5f-4d6a-a1d6-8227f9a1ee4f/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b97cf81072a8560df88a7760ef331ab9f42e3d0 Binary files /dev/null and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_039fa65bf5a4ef68f834+08b9b048/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10217061096959125489+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10217061096959125489+e30acd3a/model.neff index 7b2bcf6c7df864f79c1cb3978638a077023a7e53..03536de990f68a2785fccfc6733f2ac88755c4ed 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10217061096959125489+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10217061096959125489+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10244305442015770634+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10244305442015770634+e30acd3a/model.neff index ea713b8a932536476ded5d30704c1d141ef268ff..b5f6e8d2ab4f15f9c78c730773d7fae4ab1c8012 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10244305442015770634+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10606948783918825529+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10606948783918825529+e30acd3a/model.neff index 50cdd679bee9dcfd964d703918bfd05ed51600f4..60b6db97ca33eeb67e0e9d66f95ae93c8ae59aad 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10606948783918825529+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10645643398657092095+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10645643398657092095+e30acd3a/model.neff index b68093462141723b60ef1c4160ee94a11941a930..c3c73b2e47dbdd4614dc2899ddf2120f4b82267d 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10645643398657092095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10661660426924300837+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10661660426924300837+e30acd3a/model.neff index 9e67848173eeacc9c974875f75274ae392b409ce..e96dfaefc5cdcca1fab99d74f7939da085547bb3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10661660426924300837+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10746122569655005679+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10746122569655005679+e30acd3a/model.neff index b49a3dac1b468933b100fbce02037a6012c22e4e..80b29430c06f9089f10bec05af3f161b5f9c1300 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10746122569655005679+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11086318750207148626+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11086318750207148626+e30acd3a/model.neff index d4f8c70d93a66c454cf8c90b5c9fe0a6e8300229..52e6bd4e9c0dfd8a1912454e9ac20dc2a726adea 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11086318750207148626+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11909310600244571805+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11909310600244571805+e30acd3a/model.neff index 8a056e51ccd76483ce0591f7bd90d086fb200e5f..a73cd234bfad597c5e56b2b3f047101bf9a1d210 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11909310600244571805+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11933818254123612383+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11933818254123612383+e30acd3a/model.neff index c751b7be665085a4c1783fd4c93ffb1bda1a32ec..2710b158497386a1876b3e59e67e525d5051112c 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11933818254123612383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1210392327607194823+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1210392327607194823+e30acd3a/model.neff index 16c29975a812015a37af3960ef6363761fb11859..66e07c13e671aa190dbb73f1fcebb80493c00a59 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1210392327607194823+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12487216553200321032+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12487216553200321032+e30acd3a/model.neff index 640395f7726c5b7df1a9f20dceec23f0265fc81a..ddcada1f61cf26aa2406c04df2b1fbfdbd5ee199 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12487216553200321032+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12487216553200321032+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13085549342645515693+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13085549342645515693+e30acd3a/model.neff index 137f238d377198d62d468ddee1f0a4ce70f166d8..e8e0f66170fa4193a88d72ce7960c350a022a45a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13085549342645515693+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13085549342645515693+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13102010590082783346+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13102010590082783346+e30acd3a/model.neff index 007fd76dd1eafb093326ab982669e0adc2b235bd..b5ca0fb5a4a50815ffa5657f1b11e915202b0b09 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13102010590082783346+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13475273426270778455+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13475273426270778455+e30acd3a/model.neff index 20e7acb252562b27cbfa10029fa1ef8db6d6590b..01e63183e14cd412dfd9aaa18bc506fa4ab8ee41 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13475273426270778455+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13728813963059599796+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13728813963059599796+e30acd3a/model.neff index a19b1f814bc4c941a14e3b9c94910f5bc87e244c..849e00ebeebf488d3a1c80f1ef3c0072bf84da40 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13728813963059599796+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13783872956654054643+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13783872956654054643+e30acd3a/model.neff index 33de6cf2939409a6a11e36561e8a8b1ad1c00c51..4afbd68cc1afc37e15f19d47b2ebab051d9804d0 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13783872956654054643+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14097776130612710282+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14097776130612710282+e30acd3a/model.neff index 5381992bf02a9a68f90d3badd10ed7d3cc97d0ab..c0c3be64228f5c0a1239d04c7e1d461f83aaad3f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14097776130612710282+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14155105943057125178+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14155105943057125178+e30acd3a/model.neff index a11ef5a064cceb7076a100f3003eefe01efee7a7..fd915d276d234ddd6d406ab4b67119d6efab519d 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14155105943057125178+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14173682194645188821+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14173682194645188821+e30acd3a/model.neff index 136617c9441e843ac89ce7a8a405a7f44072d0c8..7681df19c80c1066beafc49ff082b868956ac1ca 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14173682194645188821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14274833313744358348+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14274833313744358348+e30acd3a/model.neff index 7ac113c9c3a87ca6342504232463fe66b9d1d8eb..227060944ade4d70618c794735fc00e619a7ef99 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14274833313744358348+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14469716438804935215+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14469716438804935215+e30acd3a/model.neff index 561b5ef09c775ec8fd547dce7f60b7cbd33fd0b2..de931dffa86664b010ccd6a7c470b3d907537a79 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14469716438804935215+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14739672283771660808+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14739672283771660808+e30acd3a/model.neff index cf7b80e6e3f8152f1378a577dc0a25cfc852a8fb..ba28f880e8d29bee7120b8fd02592849371784ef 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14739672283771660808+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15442663025941492357+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15442663025941492357+e30acd3a/model.neff index d79353d07bef4102b7d790db5950c9668390a3b5..551180cc7d2131eb4eefbbfbbef99f63c219fd64 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15442663025941492357+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15442663025941492357+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15557971880365771457+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15557971880365771457+e30acd3a/model.neff index 79ef9110baa743d589fc832576307765a56092e7..e37bc5883888a130f4b72ecadf232fdaef24602a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15557971880365771457+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15564425602929126510+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15564425602929126510+e30acd3a/model.neff index fc2263f08d43bd3e276aab4580738e4da4e49756..94465d32845ff6bba4a148b85fad0da5df095ba1 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15564425602929126510+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15564425602929126510+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15589838460896944293+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15589838460896944293+e30acd3a/model.neff index 5a1d2e47ae8b0907f1882d0c6f3f1b845cd80e70..a160a014eed45871118c4e0a17d50eb4ace109b2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15589838460896944293+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15589838460896944293+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15654572849171857535+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15654572849171857535+e30acd3a/model.neff index 246c5578b7020010f6279e89dbf7ff3bcfe1532d..b845d1bc5005c79ecaa86f9fbcb20b571ae64b20 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15654572849171857535+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15654572849171857535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15767383571209512795+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15767383571209512795+e30acd3a/model.neff index 287ad97521ccd12e3c2b5ab1283a111dfaf056b9..4e4d7dd723cbcd4087e27939dfb8e99609cf0155 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15767383571209512795+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15767383571209512795+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15888836045088309511+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15888836045088309511+e30acd3a/model.neff index 1641ca8fce6ba1ad305d28bb37a535277ba3b95f..e629bb48fdbc91cc8c9625aa4d865b330136f264 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15888836045088309511+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15888836045088309511+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15929451261464042997+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15929451261464042997+e30acd3a/model.neff index 172e79021c6df5ab4e2dcfc7e4f3177b75043717..2e07189a79859ea95ce37ff7036a0ae80b9edf4e 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15929451261464042997+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15929451261464042997+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16016505958416521648+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16016505958416521648+e30acd3a/model.neff index fc3ccdac010eef4f52fce3e93e85ecefdca74766..df88950be8d96aa48568069497bf18e069704c51 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16016505958416521648+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16016505958416521648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16053163479112702088+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16053163479112702088+e30acd3a/model.neff index 8c01a4e746fffc1f78322f00f3d28ad4e6d15d2e..4179b1091210132c61c11d14bd022303201166ed 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16053163479112702088+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16053163479112702088+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16204387852795926216+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16204387852795926216+e30acd3a/model.neff index 46f1fc54b5b27deaf730af72d675bdfebf3f6196..283e76c7c4fa3fb6a3cf1b53919fc0857dee5012 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16204387852795926216+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16204387852795926216+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1656719109221189948+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1656719109221189948+e30acd3a/model.neff index 358ebd39141684a19a05ae73e044a009fd7cc3d7..3c6076884b472158602cc4392927cec2706eba06 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1656719109221189948+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1656719109221189948+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16599571375348449904+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16599571375348449904+e30acd3a/model.neff index 3423d422fda4d80833a4a3c018241fe3702c8020..2b6b56c1ce753d4764aae16ee5e2043320bc52ad 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16599571375348449904+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16599571375348449904+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1663757798483801648+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1663757798483801648+e30acd3a/model.neff index c66376cbe1bac36cc26074763d328cad3882efda..5db80896f0770c0bc24adb8774a54b99b73320fd 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1663757798483801648+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1663757798483801648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16738296820980389103+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16738296820980389103+e30acd3a/model.neff index 900a8b1a1e205a493dec97f26f3c955ef196cb28..2dd904ca465b5699dc4369cb73f543a13bfbd8a2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16738296820980389103+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16738296820980389103+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1696825468766062114+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1696825468766062114+e30acd3a/model.neff index 53e38b9be0bc1ef81d448ac78159a4ecce503eba..4d623402f980cd6a008f493f3096c99a39ab5b90 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1696825468766062114+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1696825468766062114+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17476209562158013765+e30acd3a/model.neff index 6d7d825f678c184959a431f7ff0051dadc0b5b2e..c5aa6dfdc3827ea428d58e24a234dfb19bb1aab7 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17781596253725927902+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17781596253725927902+e30acd3a/model.neff index fa6f834088ac9d45538cb87752c9b49ed4815223..f1d10af359755500d982708054cea4bb7ff46fe4 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17781596253725927902+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17781596253725927902+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1795740353831178306+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1795740353831178306+e30acd3a/model.neff index e50f394c88596bb9e303a15a28dc2f3084d98e93..d33fac4230dbfa9c1925f78486d43bd0e6cba7ad 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1795740353831178306+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1795740353831178306+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1800832390737682969+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1800832390737682969+e30acd3a/model.neff index 754b74c6ed32ac4ea3f29d0d8d99d74ea5e988f5..afea6f10a8b2af1a6bb96bca24df6ef85ae9b705 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1800832390737682969+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1800832390737682969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805481651134498710+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805481651134498710+e30acd3a/model.neff index d8be41dac2fe0e677a75fc94741c2b18e5f1da31..55131b112ffe6ddb51b14482629f278aa21895b0 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805481651134498710+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805481651134498710+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805728312057401221+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805728312057401221+e30acd3a/model.neff index 7f7fa7da406780b14f479333a0875c8dd6a62e56..8a538897b3b9ab82363a9892a7b2c91e1b7d4eb0 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805728312057401221+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1805728312057401221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18070749384531238695+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18070749384531238695+e30acd3a/model.neff index 12715b0b3ee92c7244e772ef2b75a90ac787d98b..1b99fb87a79f22a9e08be181c152a3b8b49b8e9b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18070749384531238695+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18070749384531238695+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18134517905922687964+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18134517905922687964+e30acd3a/model.neff index 7b20bf6d17481d196c05a2050be63d690da15c29..2db83a50b00893d5a8abaf5c85fb0b4a27cd0190 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18134517905922687964+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18134517905922687964+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18207475553554060390+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18207475553554060390+e30acd3a/model.neff index 42002b8ca11992f75158d83c1d02f546a7926526..119451a35e69e93abd2abe6f1bc75742e3b36809 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18207475553554060390+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_18207475553554060390+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2157613318347839507+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2157613318347839507+e30acd3a/model.neff index aaa7ef7ed9455acc5c0f2e9e5bcce56464db758d..2f2ff4ff7ca1d47eac8d6c17b68f6f7682f71dd1 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2157613318347839507+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2157613318347839507+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2203200257461345827+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2203200257461345827+e30acd3a/model.neff index c15584978ba3c78afc3dc47ce5e8f3ecaed313ab..98e6bbd3fa43f194582e6c8df1638c3f7e732450 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2203200257461345827+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2203200257461345827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2410426589418994260+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2410426589418994260+e30acd3a/model.neff index 2acf53cffa409bcff262e8965dd74f0c9ad97a73..327c2b6a3e2de582c5a6e7c689fc9de638a0aed2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2410426589418994260+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2410426589418994260+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2426414314187505427+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2426414314187505427+e30acd3a/model.neff index 88c03b53c03dcd36b7cc87b76008567628e80bbd..9a879f95868ce4405d2d9d45da8033343bf075dc 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2426414314187505427+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2426414314187505427+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2576759111807165188+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2576759111807165188+e30acd3a/model.neff index e1f4b792f8d47e4d1ede83fdbaea211399210554..54f01070dc6e006861577bbe71a773979cb8b3b0 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2576759111807165188+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2576759111807165188+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2650447136112456251+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2650447136112456251+e30acd3a/model.neff index 8d6d2bfddc1291e2ab3b8c4d3af48dc6366afda5..e62a4ac4c4bf2010dd3c472a8d0a3bc290bc9cb1 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2650447136112456251+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2650447136112456251+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2682953264353234433+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2682953264353234433+e30acd3a/model.neff index 5ace95122ce130544c851c3aefe19248720deab7..698f437dd684047182dcdac6be588914c1d1badb 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2682953264353234433+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2682953264353234433+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2699280712748688265+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2699280712748688265+e30acd3a/model.neff index 7f3a9205c45a940e524af9ec57c3928f4b95deee..4c8bd9a6c3908501802d9e8169aec79fe025ba7c 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2699280712748688265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2699280712748688265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2811495285170804454+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2811495285170804454+e30acd3a/model.neff index 2e2dea609edc7df2390cda740a9ffc74a2f73ce6..8327648736b96c4a48897d61a7232144b3703683 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2811495285170804454+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2811495285170804454+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..366a3fc31275a70e9f93a3b0a39c916ae2ef20bd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_92a0c38f-0e81-4bfe-aed5-97c88e70578e/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fc98e7bf529e0dd331b0a2aff840a3e9c26ac362 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b92ea64461d3915b88e517abee15b19611092f0a9fed7a917ed85bc98f255e4 +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e6180b0c4e09e3f3b51a6badf5f28cd79e4ed6ca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_285442f0c8e665e1c464+fdadbfb3/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b5f9ebfe2d5fae1c52b833d666cbfffce14d61fa9daea380c134a905882466 +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef1d5df45a533ea1efaa92ca1f6251404fd66b42 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5c5f1cb0-4a1a-41b3-8b9e-2027af83d1fd/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d2f11b499770542a91aff97ed0a935015f5c4ef6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35385d123867bfafaa881de3234d315967a002d1266ff581d8eadea98cc23450 +size 134144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0f1e1b1148d2c429e105e5f3f4baca745891cbd4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a2a38ff534d69dee9d8+1a5ed1fb/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160af5d9de5a5cb353d28f451913d7b3dee6ff2ba501fce39119eb98c78082c5 +size 136222 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d4ce4928b7a12f18738df44fcb54168c52844a43 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_d1d0254e-e6b0-487e-a5cf-9e8aeccd8020/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7f29d45c649162f4361537758e18c8345892a8a9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb4121c000a8924beddfe7cfbfd91c8a4a1336d0f79b91228a2e3c008a86e1c +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6d97ac7619510b3040036dc8f069d958ba507435 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d53e7d37242178f5553+384bca3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71aafc3a75df30de1f7e130894b98b28f342d885a527d352b4ed1e5f622e2d0 +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_302950992539679003+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_302950992539679003+e30acd3a/model.neff index 5ccb0e8c9ca2b44e1bccfe83771e5bed29fb9aaa..21ed72a66f5b48e0f4432e02bc2914a6c7e9130f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_302950992539679003+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_302950992539679003+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4013984518400530928+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4013984518400530928+e30acd3a/model.neff index 4bb5c418fbb4a72156b0a2b24f9e031821b49a90..3fe6c0928849dba1c603c40aa6cd5cb0a6313adc 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4013984518400530928+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4013984518400530928+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4052264821257342969+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4052264821257342969+e30acd3a/model.neff index 7509d2eb48bfb8955332a9382f1ba2631d449592..f34dec436f7800a0f5961b1f69b4f5ca22dd4a26 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4052264821257342969+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4052264821257342969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4166695227062860792+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4166695227062860792+e30acd3a/model.neff index 67b3cf32af9f4db8198e6b9c2e283b20e8048402..bd48af78d92ff63fa0401aedb64520bf652b2a2b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4166695227062860792+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4166695227062860792+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4232557591140652245+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4232557591140652245+e30acd3a/model.neff index 7a6781a8571fee1700d5f3f6979f9781016ecf18..ac326e5e271dbac43e2bec7e7ff00aa197206abb 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4232557591140652245+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4232557591140652245+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4606491920565381523+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4606491920565381523+e30acd3a/model.neff index 691dce3f188c046942bfa11e886f66936df132a8..7d5b431c17635262367d947c9579d66d35dc6dfd 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4606491920565381523+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4606491920565381523+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4608333438381222049+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4608333438381222049+e30acd3a/model.neff index 2c41facfc266993821c6ffefe118b70b30fc42ef..aa1ac9c09cbbded34e5fdc1f1a77e41191a4c181 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4608333438381222049+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4608333438381222049+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4683016291147007110+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4683016291147007110+e30acd3a/model.neff index b7e1245abbd0672fbbd6c85034687c60c686edc3..b09a0c900da4994019391369ccd90559103c3350 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4683016291147007110+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4683016291147007110+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4816422433712790690+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4816422433712790690+e30acd3a/model.neff index dedd3c42f30a0c52793ba676f5ebd9bc5c833c91..cc75d90a2f95c55963ba3eba985f2989725937f3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4816422433712790690+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4816422433712790690+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4842952601010372434+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4842952601010372434+e30acd3a/model.neff index b33283f090d5255d422b72b9420b2b4abfbcf2f0..674a52f26bc24ba7e4bf5ea8fdf0154bb3681118 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4842952601010372434+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4842952601010372434+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4939721357859779936+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4939721357859779936+e30acd3a/model.neff index 8e40efb78b7ad24d1d37a8f5affc97beef79f4a8..e05bb32af9dabd4394722a110657a80b583e5c48 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4939721357859779936+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4939721357859779936+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5397223456911199516+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5397223456911199516+e30acd3a/model.neff index fb601867e0f5e1e6286dc0bf55d81158d754c8ff..c24e277446c0e66b60c8b5d3b6c674a9cce9b1ec 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5397223456911199516+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5397223456911199516+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5724544066416780383+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5724544066416780383+e30acd3a/model.neff index b57c0758aee237ecd8e7134f72a5e23bf4328c00..2659632f1f051205566aabcf5510094cdbd49cea 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5724544066416780383+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5724544066416780383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5765916484866650909+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5765916484866650909+e30acd3a/model.neff index 8c29b77be53c9cf359f69c2ab3eb94285c71e2d3..8125ffcd04887137c78aaede891ac76b71659401 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5765916484866650909+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5765916484866650909+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6368409506294683105+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6368409506294683105+e30acd3a/model.neff index 8f85df150b5e136860c157498550028d25796f54..08274bf0e4fd4cde4856bf94cfe2036704a04c4d 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6368409506294683105+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6368409506294683105+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6375780938374445148+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6375780938374445148+e30acd3a/model.neff index 133478470a8d4736947eeb90665af182c1924d06..d0d6f3044424f44b5c62017c90360bad7e5eb384 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6375780938374445148+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6375780938374445148+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6417297157123190451+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6417297157123190451+e30acd3a/model.neff index 57a89dbaafe564b3c56e680f70ad92e31f2ab5b8..e3cbd52bec89c8f64003715ac4529aeaabfe9aae 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6417297157123190451+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6417297157123190451+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_665145310648895012+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_665145310648895012+e30acd3a/model.neff index bcff31cd8dc74d1c85ed5b8b4cf269c75e5a61a1..6cd5108f3f4ba3d170f4e4c33b49c8463982ee87 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_665145310648895012+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_665145310648895012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6711765444274762085+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6711765444274762085+e30acd3a/model.neff index 9ec3a132e00288f51bbd27effff7eb871f8348ee..92eb3d4260fbac4ebda5233df9021e326bc285e6 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6711765444274762085+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6711765444274762085+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7049343467335223052+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7049343467335223052+e30acd3a/model.neff index cf0f68b28963046c4a778a65dc2ac4d2a3c7d151..8dcade0d918e258a63d13c3e2e5818b77eb1b8b3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7049343467335223052+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7049343467335223052+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7877866862268499659+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7877866862268499659+e30acd3a/model.neff index 3115cb105d39726ce048160ff5a6bbb74e32cede..a1ad3ff74fcdfef2948afe07462cfc873fe1f2b3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7877866862268499659+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7877866862268499659+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793722132614349680+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793722132614349680+e30acd3a/model.neff index d7fb62aa9b67672dd693c424cee7a89a982fc99f..368c7389f806908676fd9aa37be9d6a06a11aaa4 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793722132614349680+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793722132614349680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8d3a7665333f378c4edc34f18608b07f5b4bd52a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_e00680bb-af3c-4378-adaf-918ffacd4170/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be5670c6258a72b3253c262c04b0a3c084a1810d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc99fd8fa13fb76a08ae04c8275958cd87878cfbf86f63ae613d7efcf775bfc +size 29412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4ceae3a3d5038e070ce54a22c6e29bd1b40dbe23 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2b07bdb79531fa84330f50ca44590ad7bf6f1c819eba966e3c7f81600e39b3 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..979a3abcaaa1982b7634b0edafb76badc53d09fe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7e6ec8464ad7c3c6284f+b91a68d9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5448f6feb1d168f185f31513355c294e893a0fd87e9655da0641f3807e33846 +size 334452 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8142158350842512240+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8142158350842512240+e30acd3a/model.neff index 1ea1e2e21bf26ee3d02f36c1a112f23d25367faa..39454cf6b8b1bdf22f4f6410ac624223afa80023 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8142158350842512240+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8142158350842512240+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8146958338478347620+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8146958338478347620+e30acd3a/model.neff index eff06ee3871a4e5a28e86e4669fe2e694fa7fac2..3a97b6ad3f8105c3858c79f6f5d35b8aaf3bc5f8 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8146958338478347620+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8146958338478347620+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8245165830758578911+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8245165830758578911+e30acd3a/model.neff index 3d0ec9af968aefec0680da670a4b18d6ebca4cfd..44bd5180c86469dad411ec8e6f5f95f8565a9295 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8245165830758578911+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8245165830758578911+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8679096817551502409+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8679096817551502409+e30acd3a/model.neff index 034966d2e98c77eed60de3a9adf7ba623dd4627a..75e64a7f510cfc9639b48a47666fb96595aa5bba 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8679096817551502409+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8679096817551502409+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8882592820567938515+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8882592820567938515+e30acd3a/model.neff index 88e26fbc6dd499e4d298e1181fea5293302d6e66..132b07152656d1269b30744fc591416c5df0bf02 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8882592820567938515+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8882592820567938515+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9168843780052024308+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9168843780052024308+e30acd3a/model.neff index 837a3fe90b2caf9cf572704bfb0d0f0bd28f87db..af28f89584b80de163eea69a5a62a0e4a3d046c3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9168843780052024308+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9168843780052024308+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9527558113976496538+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9527558113976496538+e30acd3a/model.neff index 0e7512e9e58aad8cb7068e1e7645d887876776a3..aa3fcaedf349c7a761aac89211e7259c54f624a3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9527558113976496538+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9527558113976496538+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9533302140457976101+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9533302140457976101+e30acd3a/model.neff index 2c43bacdb7ce2298a8ad0c560305ef8bca87c2f1..b9b8a8614a91dc01405863093fd9c3da0009d501 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9533302140457976101+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9533302140457976101+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9770544877851564228+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9770544877851564228+e30acd3a/model.neff index f648f95177a9f42d6754642c7455a02ef0adf085..e4f585ffc0bdec82ba8875500536d6824abe4bfc 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9770544877851564228+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9770544877851564228+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5d9b97cc539d25846c2e7f9a8fecfd9632ce845b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_72f58ce5-2f6b-4300-86fa-e590856c0954/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..99713acdb70003752b0e7a6755148411704bbe08 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b89bce2852ae651221b582824844bf6ec18c58984a0eea6a0f081b18b43fba +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4ac8a289e97e4f802c99925988b2a6243c188857 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aaa9f7d6fac20c922790+e4ec13ff/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e76734b9abf1e173a6275a3b8a688053098a3ca99d7e0732b5b57898a782bea +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b5354b7b5a65a43ef1260aa0e8de13980762d080 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649ff83e0cbee13abda84e02c1a114e8b60d780456161e42f68c99c57744691c +size 739558 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aec14dd461e86ffa54174ec02498afce7b036555 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3cfe28275e1a02a015c+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71df2ef73789e25f4514066e33be9bb355e9cd3373fc5874333f64d9b8e58ae +size 26133504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c5a330b9f7fdba6bb4c837889e33f6c9e6384d0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2920e5fde77a80d2d66c9ba5addf46e53d032f533c7097cba34e31243f84ed +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7895a1535e17b68134cb8cf099641cb52c31e624 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678ec457ae2bad234a72708e006fcf0d66f32e207ee59bfa7791491109372c9d +size 1926144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ab543500356f242d56f407ac9afc6ffcf146ea3c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4df6648fd68fe444e68+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab18fc1141cf73daac1b35117b41bb341b2c594d66b4a005af15167b73c54c1a +size 2082478 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..57fc78c1b7c72fde4dc40aaf45321916d361a475 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_79ab74f4-7169-49f0-8f5c-8c466a66af5a/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07bf62460bc7813e4a95f208598e2fa4ae551d41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dc525043c8d0fc6b46382b17659e6a2f2e981788c635021a8a61aa832d2866 +size 14480 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..20ceb9294fccbca531097c8880b4bd130e1681a9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a67615099822707e3bec35758e5e45c30a125c41816862be3a6284848d61039 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a3f3a13266760fe46e8d6f2902d72cda39b9ae4e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5115466754a2222278e+246d8cec/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f3b776f6ec0a44e3cb6bd450f4258f2c947cf64c5be269bea47fbac265c187 +size 272962