diff --git a/.gitattributes b/.gitattributes index 06dfc74164a171062b94becacdf6d12e4ce1b7ec..c2c77730633ffec5700b42ecc4d0fb659772316a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13117,3 +13117,35 @@ neuronxcc-2.21.33363.0+82129205/MODULE_835ff986544e4125ef22+ae6a382b/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_c5d57e15f7d917212c36+690e2d39/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_c5d57e15f7d917212c36+690e2d39/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_e0e9037eb566898d9d22+b6452646/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c33ed737961273b658ed.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c33ed737961273b658ed.json new file mode 100644 index 0000000000000000000000000000000000000000..f3082060ce91ce23a06bf0ddf07647b8f491e402 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c33ed737961273b658ed.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d796a012004414fbb58a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d796a012004414fbb58a.json new file mode 100644 index 0000000000000000000000000000000000000000..4d5f7ff56040f90da59271617317cc7a539db9f8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d796a012004414fbb58a.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/0f11a8a435b5e1600edf.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/0f11a8a435b5e1600edf.json new file mode 100644 index 0000000000000000000000000000000000000000..2597764a68c0c5a29c2c361e3a27e9a499aef10b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/0f11a8a435b5e1600edf.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8b1d4dd2776e8a09feb9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8b1d4dd2776e8a09feb9.json new file mode 100644 index 0000000000000000000000000000000000000000..ee247c14175ba0c802cc9dbeaede06d0b951c84f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8b1d4dd2776e8a09feb9.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8dcf1d5215482c17538e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8dcf1d5215482c17538e.json new file mode 100644 index 0000000000000000000000000000000000000000..07e7ca46891bd3bfaca2268e789ec6b225fa7ae8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/llamafactory/tiny-random-Llama-3/8dcf1d5215482c17538e.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/889961c656ed9f2df481.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/889961c656ed9f2df481.json new file mode 100644 index 0000000000000000000000000000000000000000..e5503b04a37b8e94ab2af838d154a023b3d6758b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/889961c656ed9f2df481.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/a5fa387d150c32a09bf1.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/a5fa387d150c32a09bf1.json new file mode 100644 index 0000000000000000000000000000000000000000..3ccba34625c93ad2af9aa96b4da8cf6abdd8f005 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/a5fa387d150c32a09bf1.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/ecdcdf6c21a6dedcc775.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/ecdcdf6c21a6dedcc775.json new file mode 100644 index 0000000000000000000000000000000000000000..fe60d7b982f5cdc56e8bb5e9755c3a92f825fbc3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama/unsloth/Llama-3.2-1B-Instruct/ecdcdf6c21a6dedcc775.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json new file mode 100644 index 0000000000000000000000000000000000000000..db4b4c92a0e507743907c167d8bdae59a56115f3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/tiny-random/llama-4/2697e686837ac7a049a8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/tiny-random/llama-4/2697e686837ac7a049a8.json new file mode 100644 index 0000000000000000000000000000000000000000..284938c46ae1f2405146771d7a9fe2ee4dc9ad71 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/tiny-random/llama-4/2697e686837ac7a049a8.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/mixtral/dacorvo/Mixtral-tiny/1ac71c3e127cabf747eb.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/mixtral/dacorvo/Mixtral-tiny/1ac71c3e127cabf747eb.json new file mode 100644 index 0000000000000000000000000000000000000000..db4c399207b27a4d6139b095c806236d10ef8bb4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/mixtral/dacorvo/Mixtral-tiny/1ac71c3e127cabf747eb.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/b932d23a57d38a832dbf.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/b932d23a57d38a832dbf.json new file mode 100644 index 0000000000000000000000000000000000000000..3617780a88980b55c88ec9092c48e25a543eaa67 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/b932d23a57d38a832dbf.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/yujiepan/phi-4-tiny-random/203d827373a64513bf73.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/yujiepan/phi-4-tiny-random/203d827373a64513bf73.json new file mode 100644 index 0000000000000000000000000000000000000000..f5cab858497bc48ab7237f7c1a58def0c45c11d1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/phi3/yujiepan/phi-4-tiny-random/203d827373a64513bf73.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/2e290c11e66bdfb2ac31.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/2e290c11e66bdfb2ac31.json new file mode 100644 index 0000000000000000000000000000000000000000..743e7c87a88d9bd9c10963a2b9d9b85cbea8e191 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/2e290c11e66bdfb2ac31.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/400bf66f1627c17e8f43.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/400bf66f1627c17e8f43.json new file mode 100644 index 0000000000000000000000000000000000000000..29f6dc8f140b4b1cb6077d55141951c22da59b95 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/400bf66f1627c17e8f43.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/637ab279f888857fefa8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/637ab279f888857fefa8.json new file mode 100644 index 0000000000000000000000000000000000000000..3edcf0e599f408b79f3bcdf427845d140ce19568 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/Qwen/Qwen2.5-0.5B/637ab279f888857fefa8.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/719c46c550ce5344aa92.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/719c46c550ce5344aa92.json new file mode 100644 index 0000000000000000000000000000000000000000..01e0b067555312dba016570ba69d6b0940a9645b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/719c46c550ce5344aa92.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-0.6B/a969d9bef3c5eba091f9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-0.6B/a969d9bef3c5eba091f9.json new file mode 100644 index 0000000000000000000000000000000000000000..d362e6449cc19e83d3249a1569cde62af1aaa533 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-0.6B/a969d9bef3c5eba091f9.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/0b6a12d7e49a9e84983b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/0b6a12d7e49a9e84983b.json new file mode 100644 index 0000000000000000000000000000000000000000..adc01ec85dc91f5bb88fc5eafac3c86be41f2047 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/0b6a12d7e49a9e84983b.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/c9589527682241ab340e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/c9589527682241ab340e.json new file mode 100644 index 0000000000000000000000000000000000000000..bf54bde45119409e2893f167e2e73e43e4483bd1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/c9589527682241ab340e.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/94702f7d3216c1188b70.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/94702f7d3216c1188b70.json new file mode 100644 index 0000000000000000000000000000000000000000..9d1223f8f09c3e03abd2ef02e54be2d8ad698f8d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/94702f7d3216c1188b70.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/476cae34f0e66c7ceb93.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/476cae34f0e66c7ceb93.json new file mode 100644 index 0000000000000000000000000000000000000000..339021ed5db4b9ce3dee3522bbcd8aeb0d6a0f2a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/476cae34f0e66c7ceb93.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/a1a4920382e3aa2af975.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/a1a4920382e3aa2af975.json new file mode 100644 index 0000000000000000000000000000000000000000..c44f3ad3526d368e93db4ef12460b45fdf279cc9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/a1a4920382e3aa2af975.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1153aa191a56690d72ce5cf0bdbfc15a57b70240 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebab4bcb5f71de3a8c7e9ecf957d2d1a47a45c30507a1af8ee396079595387a9 +size 61563 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e4eeafd1e3608f4ddd1f6a76591a4f2d5a1e6b67 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df619b9ece922dd62783e4b0ac5ab759bb549419e1ed33401616afaf9702869 +size 205824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3dba1fc63124d1d898cafc159905bdeb9a64ffb6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_04355e038292f7e78776+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08038f75d84ba51623775c09b83eb1bf4df5d414b77d197fcaf802a163ec3b72 +size 212110 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892dfe316293590a846a1bc94df5861dc088c087 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a707756b-d875-4a97-82e9-5e255b49551c/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4398558aeb1304a7410f6b1667177d2d2e59bc4a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfb122ad0632d5420aa3be8195f9d4167a0a318f701498f3e30d2cf3e545dc1 +size 1584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..63c33a3cb09bff60420365c823c7ed8ce18c0981 Binary files /dev/null and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_123630f99d5a8c423f9e+0114cea3/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..869d433052d5a8bc45f424987a277fa42958d151 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_140dd47b-ca90-4c2f-b2b0-59cb4872d26d/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1f159e6769926b2a8865295c5a13d86e534495f3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381a5980f8186226b8f50858d7f93b0fa8ded907337d05d6cfb805635f283770 +size 134144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2f97b706f4fef9f3c29da2692afedd3500d7403d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a06020225826dfd3622+5a3c6914/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f8bcd559cf921bedef9ee95c1f34e9e6f3f8f1de916a672197d67205b15d00b +size 136222 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8d8c40b8e864da5ee08d0a9ff1573b721c5848fc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ab37542d3cf3b6441fb14fa043deb6d221000e4cc9894128080364cedc9963 +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bfe4a0e8b9397c26837dc83099d8bfed37434193 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1ddf9df4b594957856c3+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77f3686480f7953a4668d9232a48ba38d31481bc8cc4e99a4d4ad88d9cfac7f +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1670e4fac87bbbb0b5592f9892a22b3e81326668 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9e36900b-6677-49fa-b1b8-74b27c3c9edf/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d681207f945990d4e21e50b0e6b8e90d019cd7f0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc559309a214b16f3917edf6efe2b0ab578f048a6524f41ac73e1d625078b3e +size 14480 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9be3ad6f4edc07760598ef4371dcdaa1e7c2ea2d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3da7e8aa206a32f2dc5fe85b896e9a0faf8dd2fab32ed2619ac7de9302332 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..016bcf2d239656a6e62509d3d217880a51fe908b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_250c61c7b4514095412f+899b2231/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0011fc1ff22702449123519f9e62ac91a652cc2db05ab3839fdf01ea1c720b3 +size 272962 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1559aeb9d2ea24603a94fbb2895f4a3565a1df19 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_c5c626a0-0b6a-432b-8928-b04b0a469e70/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b7a9bb5ac25aa8a2c03df8d7afef689ae35abb2e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c25a250567c5dc1a4b81193d41e2922b4afbb00cd46a7a7996d97e3a97e6240 +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8ea8933cee728bd565cd0e9f14f45f3641b5ff5e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15b82211f6c989727c4b0587a944ae8c4ee54695dd0d97d4b0cbd990f99a0406 +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a13fffc6b88e1ec3d7d30c7d8127bb4cd1d1799 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ee9dde7d549ac3fdaa10dca307a1c7c739776b0c9c6ac1b10a319466fe3c12 +size 103921698 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6caf6040c4a42524e60e6178cb4ed6c5535b2bf8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684630f182582053fedae0234b6c417b237c88c377d4a07affedf4ea457a602 +size 33506304 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d2ab0f0142b59549a499fd250f137a5223bd2895 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b8c8ce8bf2645b1a2d4c1e7dde81c2e4b92aa5eddf85da33bdc53680115649 +size 64986 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80757279bd3edc7f64955ad392140f8e4a698491 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3ae9ea3e6d448b5136e5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e5e835f471c5a629ce6f9292ef58d7007582c84cf5067cd674fe3c91b38e94 +size 226304 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ad6da74c67c1105b1f5f8b82aacda1159e9d3aa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508ad7963ad320e5a005e3ff9f56394d8a7100aed8dab2c468ff1ae33b0582c4 +size 102902845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c38489d7eacac16b02fbeee06d126e9a9d929ed4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fe14fef8cb7edef25912269e4fc7a4702ac8b3eeb6438dd7f85c707f971020 +size 6001664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7536f5fd1308dbe976b0a462e568797dff7eef90 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d9da2e599c7fc0d934bc83fbf9977648655fcf372a3fe50e71b3d5780875da +size 6340945 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..09466421a1bab1b2a1499945cd0ce43e824c0f79 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_affa8051-387b-441e-91e1-d7e5f29f562b/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..93ebb38db07d858ba28f00fe727fb76cd05b2ac8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af982e3e61b178c6f1cd3437e0605566ec3eb207461226bcef33c605fb8818d +size 29532 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..099d0b163882d2c23bdea9d540bc3d194f9bffcb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4d46fbf701f577181f1abf7a94a447854b4c637641f9651a12b74be76cc734 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..67f95ec0d09e3d960203f68c52db623d1de9c7fe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6675de07fee15731682258b74224c9a4936ea843ecd4ca47d77e02edd0be36e8 +size 334452 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1bad6c386937e7badadc00a4bd6b595534498c80 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d477f34ae752f5a59c170824b8b4808870f92b137ec5c4cf7cb18299e7a0c7 +size 1574268 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c330921940432db1c9467f30af556229d8c3744a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61419326ab66344bb85210ccd91a2dabc43b1e9d97203480a85ae50ddda77c04 +size 779264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..872c8e814299cffaea5fc3da60e50ce1a65dbd45 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4fdb9fe328288afa6def+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755d6145b88a566bf58b635ca8a2acd615b4987d1523f3bbc2c377ceac06fc67 +size 787211 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c22b265836681eb9cc77571efdb14da16e4a64a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aae5258f8b343699ac01839482eeffbd639340a5a5dea0d68c80e021de6d5ff +size 114523 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af56697d585bb48ef96895b33abea74b12c3c431 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fcae40aaa8dea2f3a38fee9b066368f89aefc6bdbe05a372d391969b8bca126 +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6936b6ee50cf6dd2de3e63d8ae79e82d3c92d1e1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1463cd4278394304be16bb5d4847d123c909f228fa0cdea89e327a941b3a71 +size 299255 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..55951f531e4e2845f0ffe73966bfa39ce6eaa123 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb056dfdd8736cf81caaea5a2ff07a2ef8a98f94606a4dd35526570f41c750d0 +size 96936 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a16a99f28ec226cb88b2702546c5f8d80408517d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637ff255ea65ef5081da4e6f9004895fa625ae862b8a7756f97a8e2b6febceb0 +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b6098b16ad76b21c5ec1e1867138a64bbab5367 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf131c43642f157e6bbfa112e521c7db709a716383a86c176ec34c321a62160 +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..43218116c78eb2e13bd5cab64b421aebfdec0afc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04bec3da27eeeeb55669dc4fa35c9692d222b7d64bb98ea7931dcb7e4464910 +size 1577984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..74e5720e3bd55ef83be4ad87557b959fea67194e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_69ef043e398f3de26285+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63bee49454eb5e748181d173ff545a73c2accc06271ca0b7bc0e806c7015b60 +size 1734318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc35fa5fc45bb1f2ad3672722556517930e61926 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08c1b8413ad37d9219be950a5235991173e8a5c8013e32750de62cb1346fb6b +size 593552 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93517be15b3d90de4ffde9bc40b90de3267380b0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6f533feb49d098cd8319+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d65ffdeca90612534db4aa9b926bba14bc614d9f3106e0684ccfed8f7d9a227 +size 3769344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccfd8df1567112b5d3b3ae737d3bc010acf601b0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac09fee9beef4356d5fa3b7db4c5cc47c086e3b3d0c0d3bd4b31eb45baf9c88a +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..59e453c6dc3e9460a1a10bccd3c513a7827e317d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80cae2bfc2f488313e13f34a29c65602fcbefdc238f23694a0b5bcbc1af1d422 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3c976f64e365c467935e1776e4da9f80a0d72d3f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_91866ed418823bde5ad8+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def8afe6cacee56b3c29283920281638bd10aff127c441ae7fb7eacda0856d8c +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..06b135557d5c865814e799944a78df2c6a497448 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f01d4732-5b15-4069-94f7-6be6413e7ca3/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..943ac3298eaf8e46eb4eee7787d5e005fdf94c23 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7ce849c612a7ed9ea3ac7d64a06917f2c08a43682d4f72d905366ac68abb71 +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..63c95db1cedcd03b9dfa3f5f686e0b29b1fdc1e0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bc9b13dfd0779a230a2b+65e965ef/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2f6164507c66cc56b5530ddfb2863127fe0100587bd87fee4529e3f9acbac0 +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..00d963b0c392d8abeb3150ff16402489b82539f3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb0f3286fa552a5deaedba2813deba9d73bbc5320fd992fe39d99e5f3cca787 +size 490318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4edf5b819448384a2f9ea038e1e9d29950d39968 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7929c32eafd3b893fe9+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5550253bfa791e4f5e329e17f417c0c8ebef7afeb40e1b887373673569b8a961 +size 27905024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc1fb914aaf33bfbfbee27ab7855d701fe5e9546 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92def4bd5355f728fc79803e92305cfb7b33bcadb93eb1c52a4ff93e6a75c216 +size 1289358 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8cee9903a8bd03382f9b3ab76a701968fc664a10 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e26c5f9afdbc6d9d4282+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3b9a30250da9baba26f1a714b3859f100f1fb9dcc7649f26580db9221f3510 +size 728064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..186af078fde9ce5adb2977d5a60d54ef989c8b47 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3b92753c-e0ff-4aea-8057-8b2cc32e8e5a/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9559c42bdcfb770456ca67dc2ae3062574d492d1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53da14e04f984cfa1d1934207c8353adc2143392365770f700f869b59257c13 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fb6860dc622072cace0c9f2b613d5a399ee9e09e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e57c3946dfa9c87cb07f+9936de79/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ce6725f9e801fa5163ab54d3124e4a1e82c04a70cb3fab8807b431ef85f54e +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb index bfaa128a7cd67b9e8b0b25865e0ffeb41d6ba756..4cc121674e1b04f4d2d99e0aa8b9e8e8c64a0d98 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f14630efdbe3de1251356ca23bccdc7f08e71dd5942b317444ee4f943b12742 +oid sha256:f57a73d4a4b580f6f0395b68546ae8e34540d683ca970228abdad1435325b436 size 726714 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2e699b2b4e59ce7798d214dc19baf75167a0a508 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6bc7e58b8a4764b25b574056cafa952288e53e61f35ed4dab2f593c81fd506 +size 490318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d691fd32f6871d0a4139402a31485a6d1b574a0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9cc02790e4e79aee5af+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5680c7be16ac4998a9dad2480d56b28005084ecbb9ff31878565ec7ba8057d13 +size 19364864