diff --git a/.gitattributes b/.gitattributes index 3ca452e6b84a06a3e9a1c2344f32338b46f3f3d0..a83ae8d163216de91bf14240a1fb31e8f3bfccc0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5527,3 +5527,36 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json new file mode 100644 index 0000000000000000000000000000000000000000..18f90a7c4a9ae6f2edbbf92542c9d7e8721a0710 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json new file mode 100644 index 0000000000000000000000000000000000000000..69744ec8f8e30c97a10c0a3b59cd3337a34a1878 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json new file mode 100644 index 0000000000000000000000000000000000000000..9e00710fefe052ee46df3a2ab5db9ea1704c4983 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json new file mode 100644 index 0000000000000000000000000000000000000000..c3d21a84d80a741a283acb5b441db54adb74e925 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json new file mode 100644 index 0000000000000000000000000000000000000000..bd910a048d9df65586a0c18442929de7ab33d600 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json new file mode 100644 index 0000000000000000000000000000000000000000..fa775f740f1f4d63c4d43d7825044f613a96fd39 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", + "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json new file mode 100644 index 0000000000000000000000000000000000000000..cfd4ec7091a7dc41959a9ad3fd0123e6e56e1380 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", + "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json new file mode 100644 index 0000000000000000000000000000000000000000..a9f2095ede8770b97de4bb8e239241cce76ca557 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", + "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json new file mode 100644 index 0000000000000000000000000000000000000000..d908da752e559499825ed57d3b672acb0ee02345 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", + "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json new file mode 100644 index 0000000000000000000000000000000000000000..5cf39b373c23a0587ba13227edd00af562abc227 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct", + "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json new file mode 100644 index 0000000000000000000000000000000000000000..4642acec2779921f1e20231c641f5f6cd14c8615 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json new file mode 100644 index 0000000000000000000000000000000000000000..63b74f9b8a5cf24cb311690a6d34984ca87430d9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json new file mode 100644 index 0000000000000000000000000000000000000000..6a7046fed7b4821d2a51b6ec2e286f0ff884fd53 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json new file mode 100644 index 0000000000000000000000000000000000000000..d3139015e2eda01987139b2c487ee86c6e0c29f5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json new file mode 100644 index 0000000000000000000000000000000000000000..4e712b4a22a4aee80b602e40d9ad28b560c593f8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec949c58737b24d3f6135b8e329bf63e9bdda64 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json new file mode 100644 index 0000000000000000000000000000000000000000..5be61d0b657ad8d9ab0b80ad24780cffacab2914 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json new file mode 100644 index 0000000000000000000000000000000000000000..1fb53e97ea1db878f49ac754d68e16c515358080 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json new file mode 100644 index 0000000000000000000000000000000000000000..a29181e3f7638347c6e36712581bee37aa1d6faa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 2, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json new file mode 100644 index 0000000000000000000000000000000000000000..db4b4c92a0e507743907c167d8bdae59a56115f3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a57e9d7129cf67912a79bb197b2f281e3fce6e59 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4c741fe5e0f5b099d40b9978c367531955572f5eba53147210debe29f33a26 +size 1367989 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6bf8ab2636288069ffe55e8f73646d6ccb467157 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0e63b1bec594156292e975af5ebac2aa3bb0d8c959215f7db5c093a259807e +size 4363264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b0f0dc334ff0a417ec8ee679cacd2dbb7ace88ee --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f129e0558f3266c48325433d7aa8aa9b924e2d1f17314cb8cdba266262ed49 +size 4546847 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48491ce2440fe6e652b6465fd499b0b7f29ded7f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65796c06ab7af92ba6a9644f2a7c7c097975c802b5cedd1e47a90662ab848112 +size 912376 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f689f904a633d3138f70515ba7bf10e66eec9ac4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5eb5eae4c845851230e3741e6fc70cff56ba8b46ba4d31250494828d4a27ac1 +size 2059264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b29e54f0b6f0cead363b482b9dd60995ea18fb0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd067bb8e7f5373540ba520c274ee981f02b303c76853620a457e0545ac079a +size 1023328 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df8f1e04ec7dd015e82ff1ed3c6212e2876b1d2f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38267b698da6fcc510460c0e5b4e0f7ee008df2357326e82aad15c83b85edf84 +size 6329344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ad843333a219c04938934deefbb41f0a64c631c8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fb8a1567382a6d23c42f1d970257f9116648f1bc3404e556e772f97c1f482a +size 6513830 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78e5a75d7f38b908f40f7d9a6f442dfa7c809d9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5dd1e88dcbe6ba0ab874f03f2b1d4744e635d4846a2aac3c8e710312fdfcd0 +size 406532870 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0ea0adcc57d30c53f3d5efd8aa1200195d151d2d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92be29e4a8951ff436d82973d192d2ec52cce53a81a90c78ce3a361cb44091d +size 142459904 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e29f34b619d522038c69e8311bd5d7d74103fda3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f01613d0337aeb0f0977a78b99e1c0cee9913120fd05928157a424acb5f406 +size 102782272 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92b1e6bfc1ac7dec5fea553d256b27754f6dab57 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38aaaaff890bd48bf0b499b1329062e1f3b38252834fcef1dabdb86d716d92d1 +size 7732224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..219f7aa8e8f7633b52d06b39d915631989cfd497 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66883cb96f56ad390822ff24b144b0714b2f91b0f4ca936d0f79de494d245975 +size 8071521 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a13fffc6b88e1ec3d7d30c7d8127bb4cd1d1799 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ee9dde7d549ac3fdaa10dca307a1c7c739776b0c9c6ac1b10a319466fe3c12 +size 103921698 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6caf6040c4a42524e60e6178cb4ed6c5535b2bf8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684630f182582053fedae0234b6c417b237c88c377d4a07affedf4ea457a602 +size 33506304 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ad6da74c67c1105b1f5f8b82aacda1159e9d3aa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508ad7963ad320e5a005e3ff9f56394d8a7100aed8dab2c468ff1ae33b0582c4 +size 102902845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c38489d7eacac16b02fbeee06d126e9a9d929ed4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fe14fef8cb7edef25912269e4fc7a4702ac8b3eeb6438dd7f85c707f971020 +size 6001664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7536f5fd1308dbe976b0a462e568797dff7eef90 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d9da2e599c7fc0d934bc83fbf9977648655fcf372a3fe50e71b3d5780875da +size 6340945 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d058e49ab1738066e3efc097380ede9f5072a267 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7eb966ef00091f9d402cc0bf3be21635e3c872be5c29fa8b462412f8854de1 +size 1020517 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da49ff0a8e92db85bb44c62983c5990f1d928f63 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596cc9acd40beed4e3862024eb24acc217aef6cf9c2808fc37b379642e5e2275 +size 4260864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..73d8eac9cbf9d3f22c6e6c8e31f6269c7b88bd3b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bedf57be10368e3ddc71e80248bf774c01c3bc7141443c53e5847f009322e662 +size 4444786 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c22b265836681eb9cc77571efdb14da16e4a64a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aae5258f8b343699ac01839482eeffbd639340a5a5dea0d68c80e021de6d5ff +size 114523 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af56697d585bb48ef96895b33abea74b12c3c431 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fcae40aaa8dea2f3a38fee9b066368f89aefc6bdbe05a372d391969b8bca126 +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6936b6ee50cf6dd2de3e63d8ae79e82d3c92d1e1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1463cd4278394304be16bb5d4847d123c909f228fa0cdea89e327a941b3a71 +size 299255 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..55951f531e4e2845f0ffe73966bfa39ce6eaa123 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb056dfdd8736cf81caaea5a2ff07a2ef8a98f94606a4dd35526570f41c750d0 +size 96936 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a16a99f28ec226cb88b2702546c5f8d80408517d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637ff255ea65ef5081da4e6f9004895fa625ae862b8a7756f97a8e2b6febceb0 +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9226044d33b24e38ddf1557601990ed4797fdb34 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7ede6226f0b98d255d283aa3be44c8a32064ca6e73716556ca5b9c6ee85e6d +size 1258183 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1a9d6f8fc67c65034d59ea9b635ff9e90f932082 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1c23f6a4c532f542c13d611f8e35e06a56495455f2162a6d912945b7f52f35 +size 2356224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8b19f0048a569770413ab3815e0bc232b498489 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680d4e37a8f44822e1231adb51fdb31fc72cb736798a9df7a85f8a430578e289 +size 1001284 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f306d7e117b56fba2057d7bda684e94ad153a2a5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f5407716f190a1782959396e59f59d1bdd2ab28ca15fe36f7f6461599fd809 +size 2089984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce0e73fcdaf7e6bf646b43821346492e62fdef0d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6fd373ae4598ec7f465a203b45d451933d05e85e21902122f538303025220c +size 1011684 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7658dc5303f0bf7b81aab15eeb36e2de0ab169bd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9f3801b64a06db0256faa26cbfda91fdd26e3b592359f9f37e01e7b72083fa +size 16395264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac4d705f8cb11ec29fd69fbc7d6497fa112f550f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbf3e2d2ddd27fd51efab3be728c9d8a17d252fea1a7259a2f651bfca85b414 +size 103719863 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..130986a7acaa1084675f232743c68a6e0d7727fb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5ce85354fd31ea09c71344aff931fcc5b9bef58985efcbd154de3460bb86a1 +size 33383424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..43397a4009843017c0d7061ae7e43cff214a0e30 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed7be0d643335e6058f2d92ffa1945771e08cc45f7cd7f47b717fc1d726202a +size 1019874 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1dacb8f1a1cf29b2738a14fe7d9ac6832f12359f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f76d266f4773d01f0ba2794b6034f173adbbcb5a0f23af08d7be54b2eecc9e +size 3380224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..06e6dd5ab32b7f8f02f200c41d3a8a93055437eb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0881c64617785697e18707b7379a84aa627c580adafd2cb07b43857bfc97e19d +size 3564426 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a5c809e3365004f1d6d6e11ecfae8c33037ccd50 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be6b9da599210d66fa929bf93ccec3032ff8dd9f40ee58f39c752669622ef61 +size 1758623 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3fe955084587e53d1804180d075f21fac90b7cea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfc3e691adfa79d19b3cd94616cfb53686fbb2ad37d9f8b0493c94ea0b2aa5e +size 14767104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..532c053e1fcff641f299d4f0f06aff57a234b9df --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ccd35ccb84fbc610acdce660f25bb465fb7550eec55b906c6d91639a5a49f5c +size 1380280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1c7a49f574bc71bb03d9b1d8c09000c3460dfbf2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9bc118b79f3eb64ec5072c7e25d3ab335ab53a7d62a1c95c550559278cf8dc +size 6032384 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fe05377d7ef190a67977a0be313216923be2b8e2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b306c1ce56f075b2c6cc356a0606d0c1ad1e39049d22778dc449a9a75503fb2 +size 6215967 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e5ba0c5efb0f3aae3ca738c52388fbf977a7de41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ad8897a3fe17be9577943cfecea97993cbcfb9146131df839592c6100f9f99 +size 404905169 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2a92ea1148a9b0cc37e747d9d3d78a08cd4558a2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d257d67593d9ae8413f9cfa70866a6c7a17d684d4e9751b72106961490e17128 +size 12575744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..eaa330708d3e2d02db4e3f7a2f406e5bf02e409a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ed34a4b552b12d702e8ac979a0a1028a7b8cb4de1bab47df482893476ab07b +size 12915025 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1a92f0306fdaae497b6ab32f9886d343780bd0db --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8a711589356c8692658082894779fd7604804606263594924b5578c58dc43e +size 922776 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..61ba945a8a9f8fcc076af79d64adbd3a37389458 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8357cbcc340f1eaa9570839ad3fc78ef16e789e87e8ec862707ff5acbef14814 +size 16661504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..45e0fd2d2ac21d03c22a31c6b9cfb30bae43b66a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843f42ee7deeb46136363f223799aec7b46bd8058b84ee4fadc2889f1fc425cf +size 104032199 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..827afed8dede9fafcd2983e34b27b28465a63669 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42bb442076b0964e1202f8269a4768be294785451d84b625585dc9150f454450 +size 33537024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..76fcfe1ac9e2cbd1c81651ce6299625465aabd86 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407f935149f119f2c4164c2d2938216d252069b973106eb3e92e093934a304df +size 1022685 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..57b05b79d9446cc9b30913bdfeb897d3a957c429 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d21046e265bc255cf9a5752c980c317bff3bf1d628e6ff41e4e22a2009b296e +size 3933184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..72eafd056ee35f75fb3f8c055b8a7bca703c3f33 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8f092d4ab4208c9e22144c5268b83c6e2d845fbaf3d49072a4cdc000cd4c6f +size 4117950 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb index 9e779f4b1c0c11a3f33e945e0936d69d5974be17..4cc121674e1b04f4d2d99e0aa8b9e8e8c64a0d98 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf764ba754c173a8ec6b7e81b9c36488cfd998ba9fdbab2f299010e295e8e4af +oid sha256:f57a73d4a4b580f6f0395b68546ae8e34540d683ca970228abdad1435325b436 size 726714 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2cd5f0b9949da3c6280c02c994221dddf5948118 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8448616698b0ea92f067f1e7d3039182dd6f06943b79ec50e4d116ceae482358 +size 103204945 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b0dd3ec419571cbb56ee20c2ec3d6b5c669bc023 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bcadad12d350f3848fad7dfeac2c454be4e675aa2f035ab116cf9a94454f167 +size 7721984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..baf861771f0322b71f58eb4c2c287553c6ed35a3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1d43488040924c9d2bd51cf89f431e515b370b1767db4466f91411343a573a +size 8061265