diff --git a/.gitattributes b/.gitattributes index d34a2134e49de1e9fb2773a74c9a3c3516334546..783f25fa4387612ce563637e53eb5be0e38e48df 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16583,3 +16583,24 @@ neuronxcc-2.21.33363.0+82129205/MODULE_fb7c14df8ae0afce903f+a02c3a36/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_fb7c14df8ae0afce903f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_ffb27087e7fee87434c8+10b1ec0b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_ffb27087e7fee87434c8+10b1ec0b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/7e7aa9f507e35c13dd8f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/7e7aa9f507e35c13dd8f.json new file mode 100644 index 0000000000000000000000000000000000000000..217e10914229ed9635f996d6a6550553aec3c53d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/7e7aa9f507e35c13dd8f.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/d747a4330602daca7212.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/d747a4330602daca7212.json new file mode 100644 index 0000000000000000000000000000000000000000..e773b7a06981f6111d5eaa9d733c94383fc45df4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/d747a4330602daca7212.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/87e4d7b9b66d2da2ff9a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/87e4d7b9b66d2da2ff9a.json new file mode 100644 index 0000000000000000000000000000000000000000..434db9dca119d5de61c5d855ccc91b9a5d5f2abc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/87e4d7b9b66d2da2ff9a.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6f5a57eff96603155de0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6f5a57eff96603155de0.json new file mode 100644 index 0000000000000000000000000000000000000000..b9cf8b68b24233a491c202e98c9c5c6fdce22b89 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/6f5a57eff96603155de0.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/76d7cb8b92e6056492b5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/76d7cb8b92e6056492b5.json new file mode 100644 index 0000000000000000000000000000000000000000..48b6289d780e664c88c021a1e90eb4e35e83bf5b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/76d7cb8b92e6056492b5.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/f632bb119dd8a1b24f5e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/f632bb119dd8a1b24f5e.json new file mode 100644 index 0000000000000000000000000000000000000000..31f90e6e0ca66a4bbff827d339f27d7f69bb2755 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/f632bb119dd8a1b24f5e.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/358bc3b8dd83586373b3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/358bc3b8dd83586373b3.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0ada61d9e48c72f192e7152270a469416c85fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/358bc3b8dd83586373b3.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/0eb12d39b1e9ae9119c9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/0eb12d39b1e9ae9119c9.json new file mode 100644 index 0000000000000000000000000000000000000000..5432d724e669fa651edf0b20bf98e9c0958be303 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/0eb12d39b1e9ae9119c9.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5aa97d9f724b404c833e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5aa97d9f724b404c833e.json new file mode 100644 index 0000000000000000000000000000000000000000..63bd277cd8644efa2057673df39a753bc80bfa68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5aa97d9f724b404c833e.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/e31330a79f96b75c3b92.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/e31330a79f96b75c3b92.json new file mode 100644 index 0000000000000000000000000000000000000000..024a35ae67c4682840d46710e0b4abcdc1f093ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/e31330a79f96b75c3b92.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/611ecd616a9855ed5c94.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/611ecd616a9855ed5c94.json new file mode 100644 index 0000000000000000000000000000000000000000..09b2397bb94d60cf975ae361a5d361e5addbfb25 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/611ecd616a9855ed5c94.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/892f9abb8947c25db2bf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/892f9abb8947c25db2bf.json new file mode 100644 index 0000000000000000000000000000000000000000..21ba92c475399f0dd3142402fd72c5137ea1ae6f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/892f9abb8947c25db2bf.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ac47df4e9fa0ab11045d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ac47df4e9fa0ab11045d.json new file mode 100644 index 0000000000000000000000000000000000000000..74ebb9181e7acd8e77b0484a8d383e7d27bbefcf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ac47df4e9fa0ab11045d.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/cf5f0e54eefd9f4799e0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/cf5f0e54eefd9f4799e0.json new file mode 100644 index 0000000000000000000000000000000000000000..b8332868a1232357136be2fd27b9bce7d00dcfe5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/cf5f0e54eefd9f4799e0.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ed3370323d34bfffb99b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ed3370323d34bfffb99b.json new file mode 100644 index 0000000000000000000000000000000000000000..27bb572ecb4e20204fa1d55258b5b808bbdd24e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/ed3370323d34bfffb99b.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/ca162bc98de3faa6a1cd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/ca162bc98de3faa6a1cd.json new file mode 100644 index 0000000000000000000000000000000000000000..0c7f51b20d88a0a54a4af9a516b6770ccc5f6702 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/ca162bc98de3faa6a1cd.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d47c8eab712558fa6cc9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d47c8eab712558fa6cc9.json new file mode 100644 index 0000000000000000000000000000000000000000..b39e51b56f29d18ab99a959b466834b04488c2fb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/d47c8eab712558fa6cc9.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/fcb385e634a21f77f57d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/fcb385e634a21f77f57d.json new file mode 100644 index 0000000000000000000000000000000000000000..15dc3f59fad29c90f20b6c79cf3c4c66b57ef01d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/fcb385e634a21f77f57d.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7f19e74badd6a1dfeca6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7f19e74badd6a1dfeca6.json new file mode 100644 index 0000000000000000000000000000000000000000..c187d504391bdefeb97e2f06b1186b371373c43f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7f19e74badd6a1dfeca6.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/ae24b43fff19bfb9e3a2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/ae24b43fff19bfb9e3a2.json new file mode 100644 index 0000000000000000000000000000000000000000..e37cd603ddeab5ccfb7113413c02bf2092a3b7a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/ae24b43fff19bfb9e3a2.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/dcaac38891c5f3741dbf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/dcaac38891c5f3741dbf.json new file mode 100644 index 0000000000000000000000000000000000000000..6fb7df8c59a31ad97377f73c4d58395b29b14e40 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/dcaac38891c5f3741dbf.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/d2937f050f6af743c80a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/d2937f050f6af743c80a.json new file mode 100644 index 0000000000000000000000000000000000000000..4c34f38c38bbad6cb16bb1791581b2df81e93828 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/d2937f050f6af743c80a.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/530760a6f19f555b1e54.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/530760a6f19f555b1e54.json new file mode 100644 index 0000000000000000000000000000000000000000..24e9fb4f8cb32f14ede85659e1bea8b1f14cb56b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/530760a6f19f555b1e54.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/e4a2dbc4e1473e9dd12b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/e4a2dbc4e1473e9dd12b.json new file mode 100644 index 0000000000000000000000000000000000000000..409e902efc0480106661e0c8c61b2bc18e0e53df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/e4a2dbc4e1473e9dd12b.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/26d2bf7c5a58fbbf78f5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/26d2bf7c5a58fbbf78f5.json new file mode 100644 index 0000000000000000000000000000000000000000..e05926bda5dd783f234795f5e7297d6b9e199c5e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/26d2bf7c5a58fbbf78f5.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/2268dc095fac8097b02a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/2268dc095fac8097b02a.json new file mode 100644 index 0000000000000000000000000000000000000000..ff7cde52445626d4ecad7210b6a09a85d170b610 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/2268dc095fac8097b02a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/89287ea57822b502eea6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/89287ea57822b502eea6.json new file mode 100644 index 0000000000000000000000000000000000000000..f9654d73ab20e9bceb218b4a7a005cf2f23720d8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/89287ea57822b502eea6.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b906aa10209b714ed9c0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b906aa10209b714ed9c0.json new file mode 100644 index 0000000000000000000000000000000000000000..bd8827415272f32353129c09693db442919ac8d1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b906aa10209b714ed9c0.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d097d5805242ccf7e23f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d097d5805242ccf7e23f.json new file mode 100644 index 0000000000000000000000000000000000000000..20ec951daca7ea54e781e978606efc0899c5ee22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d097d5805242ccf7e23f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/db821ec893cd35f8b945.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/db821ec893cd35f8b945.json new file mode 100644 index 0000000000000000000000000000000000000000..a22c2019eb330eca26ca05bc08f20460d6df346c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/db821ec893cd35f8b945.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/d62ebc09c7e82a133b65.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/d62ebc09c7e82a133b65.json new file mode 100644 index 0000000000000000000000000000000000000000..13a4257d464bd1611725dcd18be762e29bf353c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/d62ebc09c7e82a133b65.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/gemma3_text/unsloth/gemma-3-270m-it/76d7cb8b92e6056492b5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/gemma3_text/unsloth/gemma-3-270m-it/76d7cb8b92e6056492b5.json new file mode 100644 index 0000000000000000000000000000000000000000..48b6289d780e664c88c021a1e90eb4e35e83bf5b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/gemma3_text/unsloth/gemma-3-270m-it/76d7cb8b92e6056492b5.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d62ebc09c7e82a133b65.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d62ebc09c7e82a133b65.json new file mode 100644 index 0000000000000000000000000000000000000000..13a4257d464bd1611725dcd18be762e29bf353c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d62ebc09c7e82a133b65.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/ibm-granite/granite-3.1-2b-instruct/d747a4330602daca7212.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/ibm-granite/granite-3.1-2b-instruct/d747a4330602daca7212.json new file mode 100644 index 0000000000000000000000000000000000000000..e773b7a06981f6111d5eaa9d733c94383fc45df4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/granite/ibm-granite/granite-3.1-2b-instruct/d747a4330602daca7212.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/llamafactory/tiny-random-Llama-3/cf5f0e54eefd9f4799e0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/llamafactory/tiny-random-Llama-3/cf5f0e54eefd9f4799e0.json new file mode 100644 index 0000000000000000000000000000000000000000..b8332868a1232357136be2fd27b9bce7d00dcfe5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/llamafactory/tiny-random-Llama-3/cf5f0e54eefd9f4799e0.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/unsloth/Llama-3.2-1B-Instruct/d097d5805242ccf7e23f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/unsloth/Llama-3.2-1B-Instruct/d097d5805242ccf7e23f.json new file mode 100644 index 0000000000000000000000000000000000000000..20ec951daca7ea54e781e978606efc0899c5ee22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama/unsloth/Llama-3.2-1B-Instruct/d097d5805242ccf7e23f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama4_text/tiny-random/llama-4/26d2bf7c5a58fbbf78f5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama4_text/tiny-random/llama-4/26d2bf7c5a58fbbf78f5.json new file mode 100644 index 0000000000000000000000000000000000000000..e05926bda5dd783f234795f5e7297d6b9e199c5e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/llama4_text/tiny-random/llama-4/26d2bf7c5a58fbbf78f5.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/mixtral/dacorvo/Mixtral-tiny/358bc3b8dd83586373b3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/mixtral/dacorvo/Mixtral-tiny/358bc3b8dd83586373b3.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0ada61d9e48c72f192e7152270a469416c85fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/mixtral/dacorvo/Mixtral-tiny/358bc3b8dd83586373b3.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/microsoft/Phi-3.5-mini-instruct/ae24b43fff19bfb9e3a2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/microsoft/Phi-3.5-mini-instruct/ae24b43fff19bfb9e3a2.json new file mode 100644 index 0000000000000000000000000000000000000000..e37cd603ddeab5ccfb7113413c02bf2092a3b7a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/microsoft/Phi-3.5-mini-instruct/ae24b43fff19bfb9e3a2.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/yujiepan/phi-4-tiny-random/87e4d7b9b66d2da2ff9a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/yujiepan/phi-4-tiny-random/87e4d7b9b66d2da2ff9a.json new file mode 100644 index 0000000000000000000000000000000000000000..434db9dca119d5de61c5d855ccc91b9a5d5f2abc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/phi3/yujiepan/phi-4-tiny-random/87e4d7b9b66d2da2ff9a.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/Qwen/Qwen2.5-0.5B/0eb12d39b1e9ae9119c9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/Qwen/Qwen2.5-0.5B/0eb12d39b1e9ae9119c9.json new file mode 100644 index 0000000000000000000000000000000000000000..5432d724e669fa651edf0b20bf98e9c0958be303 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/Qwen/Qwen2.5-0.5B/0eb12d39b1e9ae9119c9.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7f19e74badd6a1dfeca6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7f19e74badd6a1dfeca6.json new file mode 100644 index 0000000000000000000000000000000000000000..c187d504391bdefeb97e2f06b1186b371373c43f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/7f19e74badd6a1dfeca6.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-0.6B/e4a2dbc4e1473e9dd12b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-0.6B/e4a2dbc4e1473e9dd12b.json new file mode 100644 index 0000000000000000000000000000000000000000..409e902efc0480106661e0c8c61b2bc18e0e53df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-0.6B/e4a2dbc4e1473e9dd12b.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/892f9abb8947c25db2bf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/892f9abb8947c25db2bf.json new file mode 100644 index 0000000000000000000000000000000000000000..21ba92c475399f0dd3142402fd72c5137ea1ae6f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/892f9abb8947c25db2bf.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/d2937f050f6af743c80a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/d2937f050f6af743c80a.json new file mode 100644 index 0000000000000000000000000000000000000000..4c34f38c38bbad6cb16bb1791581b2df81e93828 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/d2937f050f6af743c80a.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d47c8eab712558fa6cc9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d47c8eab712558fa6cc9.json new file mode 100644 index 0000000000000000000000000000000000000000..b39e51b56f29d18ab99a959b466834b04488c2fb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/d47c8eab712558fa6cc9.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff index ce389f6ee7f5ca999b4c3ec156df11a7c21f618b..3e34c2980ade69a1dcbdf8e51d89aaaf03d6c963 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff index 0c067051b9b06a9c4a29aeaac166fbc5bd682c81..94e503153a287916fd28fe9aef32c4c4151dbaf3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff index 24a5075f196c19a4838de8f5ea5a0b5e316c5c5c..4afd85112aafdca0bad8c3279dae09aaf5dfb428 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff index cb5e772ca007779ff0d2b2d6d91b150286c109a7..38a73f6cfb3877e6bb78bde6d26152b2dd55569f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff index d94e84527bec711725c28871ca3875b3e55cb9c2..524ac606e31e910b462136c03fb3bc1dfcbc2858 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff index 74b131b3717b664e3164d1496e192247ad9c49d4..d1d315a125b585e67d1ad1d7b9ee1b6f15d1f168 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff index 1e4e82123a6b33c1d0acd5f7c6135f0acd2ec1ba..8ee8108e85e0aa04e983e233156a02dd149c1aaa 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff index 962cfea2fad13edb7a3fab9456705e063266911c..5e76821f3bc5879affa5ba117c789b715cde9f23 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff index 1f998cc36495fb635cef0ab2ac713cc893ae3b12..8f33f6e4cff05f2588f38cad798fb91c9314d8d0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff index 5520933555d71afa31617f39cf896bf6155bc35e..b8fa2dd295472d599a0b17f753684a37768f9e31 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff index e1c8028ad9e0f230d8e46b3c4d0709d6eead8dd9..7bf545041fe4645f55df110cb23585368a05ad66 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff index dc532e55c226733b461e4b52bb8ab472cf7cc9f8..3420bfb9c6605094925028360840fc69afb37ed9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff index 0ea785a1ec32ad28f324e1bc0dae08c846bf898d..afda08ee0c8111aa761538199f2355dc2222a35b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..80fd03abf03205c03b071fe2f46ea4e2bde808a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_fc9ac3e5-7d36-4a7d-84ff-da9fdc37c0b8/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e51cb39a846e4f58cfe3d5ec50ea34e7babca50 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c233e7013daa344cb368079ec790fbf10f597bc7f8708f8c91ff6684cdc1cd2e +size 29412 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c321ef25fb2493f39264e33ab48d71aee2e7002d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e5bf0a248f11d5eeb9973ac4e7c4230f8bbf2699711ad08aa79965a13227ee +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..72b548f20c7287e3abdaa3e83c78fb9dfdd36f00 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_131b92a29a4e826e3b6b+856272e2/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50b1aab7520bb338defeab4dc9189cad2ac50530878f265add5a4e3d2dbc258 +size 334452 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff index beffbf9fb084e9aa8d8313fd5de4037d35a3ba42..6877b4e004a8fd429dacbf0c4c195f66184965a3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff index 119fc22567dae304f3de09077aed5fbfb863b804..92aba1535f88f3aa36e315e0d34f4babd83edf58 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff index ab99bb19f6ca9b276e4ea36158e1649c63a36dbd..c695d10ea694597a5f836a20bb33199270f7e2e3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff index 6cbcd6933af9f4d0cafa9d8f205fd0edd73c0fc5..4eaceba6f6ed01c901ff1d1ca927883ac16c1bde 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff index 96d58050fd4d0fe5804cdf24b33a5e6cb5f24668..4331845a6c13fe24cc72177cbc445395aedecf01 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff index 47cd7212020441cd80b57a5663373b1d096fe04d..c60c4f4de7ea200bcc007bd81dbcd58ef14ce813 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff index b1cf459f7da8284462b155575a0827404c3b3312..3c16c750c25551743ed51d8fee31b570e1e853a7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff index 147a706f6a211395fbc36b49621f17ade1a4470b..f3c334b13638ef45c83d2dc6e295ffed31dd1d85 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff index f8c5acfff753f101541bb5ada2b8841f591e1762..f531bb624e28f3dfae8a4ae0ffe5ee5b30c77d23 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff index a34d28ab284ad360f7fed2e919076e03e5eaabbd..6b05605e29cb12b7d4b8998a468ff4554018fba6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff index 9bce0441a7c0e6adef00dc0100afc2b764e4ce3b..3023dc4e990d00a11ca7e339d82d186be9c90c10 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff index 1f9fedf392dae3147cb35b2a74ec5cb12b1e20d5..c1fba6e674a392724f9131a7347f4744d99ddcb2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff index 4375eac39be9f2df340d3bec34efb7158892528f..8b0ae39312f828f284f6ec71e05204a0c3dfaedc 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff index a59dba8f3d2ec6722bdd2753db4fcde9b04696b9..04666f926997f4653009ea7f63578d6a0eb9e60c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff index e2cf21d099f705b93cbcf200c4afc35092bbe2f9..cfeadce60e8e54713c4a559b98eb8fd46c801ab1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff index 08d37080c52b510ecf491dda92ba2ef1bb00da24..bc4b35b433a56f9d4ac6d39bfc5ff38528300d88 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff index fc56ce276dab4240834d6f20a081c4e1a95782f8..447ecfa1b02993d988bf86b8023b0f83ff8e401a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff index dc2055b22789f8ecaab7190991e132243a6db068..497ef42aac8d0eababc9bebcf69aa0df74fcecac 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff index 0aa807ba4107aab0e25d9c6e65025abd4e3dea1b..4aa612e25df7928495fa30f732e08cb19f45fc62 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff index b0e1f293b5b2002c6836d46a3538900a8c7c5ebd..ac7809b46bc53d7175f07072a976b394c803fd41 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff index 63a82e6a29b880a62df70dd620f5d1f495dc8120..38e1d2275f79df310f00ad7ec40d1e22f8446a10 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff index ab5114761e8276d190fe546cdea19d4610f8557e..beceb51ca517e679535a11799d475cd9227b1c6a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff index 13435a3c01088a5b2c2fff32d0e744f0ee7b3c6a..4346f4b419928b8b00d6fa493a399aeb300abf4d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff index 3b6af0b5e0e6269a880a060549c43453384eaaa2..335873944d950adefc661f5ab799297533b25ffd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff index 055a596324ca9dc35d5544c7b44e90990e8945c1..284277094d772b94567460091de68dc048d8006c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index 16454301701bcde1e8cc715e4bf84698b69012ea..d808d4e502c1b69380417e1e21e69c740c4449df 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff index 803e0a10e285dcf8abe42d38d622fe876169288a..36c6edd3d83841bcc1e0d54b9fcff9621aa4ca32 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff index f815bce910d6a0d94398f5f22af9aae75443a743..014069929c2228b68b56387b85554909ef1902ec 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff index 26b29a9be04059d8a80aa290d5e0a57ee06831f9..63e72692a003b9ba35e2551994053b673badd852 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff index 6e79bf06b1d256e4c700f7ad9d17dabcd16ab6e2..6965229004e0570152d561ab363f8a7f33c15bbe 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff index 7222e5b32b8e52ca25947e2786069a64be164001..77243f36bf1684adce7f7f6c30b41189277003f9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff index 2cc8da90b0b89bf0d8b3014ea26acccce2d373ee..e8775b3de5ccbd9cb0d4d9b1a20e274788c1465c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff index a353cd772b6725cd5d7e4fc321178706a2a4d9b9..4be93b54b187e68e3b7df7f60d4ed156c1c66554 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff index 04a0fd5dd3b10e68168cee9e4dd9616c9cd703e4..52fc9d82a6b0450b6528f5f6193ef1652a5cf8f1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..6a272cbc3522e4a6ec0ece911e8e900140f96fc5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_c533db6e-4082-4ea6-bbbc-200444a81865/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b7f9f80de01bab3afae30f106439a7600609730b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4f4cdcd60e648ee1c6f483fd7d775daeec9d14a980ac23a3446e8df4ef38a3 +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5343adef6dd999be9a9c279e000af03b0305272e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22ae236eb2220b92a24fe4653eec9271165dd7dfb2dfe04a21337b4e3971e3f +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bb5d486a39377d5e840e03d04fd6e934dd0495e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e869bccd9b21a627b26+0284df81/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ed3ae4ce03fa5b6ec846c2ca45c5fde8b8c835a1d0bb56e906623c87c4ba42 +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff index 79ec7ad37450e5751cc756b0276e677ccfdf7930..e0a8c6c2117864c56bd0bba9165d85b68f969313 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff index e29c574e4a6eb669c7d3ae8d9d8a76dbc55ffbf4..3a76d39a6278a193c3d712e05682b6a01a8e1392 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff index b9d2f0ba8e6d73ebb942f822e663cacd7d4e006d..a54a8dd6871783d1ebac2c4d7a9616b292b2206c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff index 185387156ad240d206a81b6590d2ed3ec96c7ce0..65a510ca4889e820d6d030892636957b0a9587b8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc18d4e98501b92c2b673e90a75b6eb0f915c91b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5623b148-533b-4217-9a86-7ef52518dad3/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b923001a6560020125fc4e810c86e51e072498b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14143475c70452d88c564a050dfcd562e6a4f290a530c36044a340bb635beca3 +size 103424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5c6ae67a6e41fde4b54c16c599044ce61a92c185 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_24ff57b14953abfc2504+63690fd0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:758fb323a4e2946f0382a737b09daa44ed6549b6fb1b093e483341b950453f17 +size 104320 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff index 4b750aa8b53af122b1d94391ead420ff0e1be025..e98316277cb98efc3a722242025eb358ea4c16a0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff index 5e4d63c64bd2b102082a4d020ac8f49d22dc586f..47954bf2f12b5aee90a2c822c3b444c2fe06909a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff index ee95a9f8db253a7eb7067c0ed962723857b357e3..b2bed8d8905566df58161d4b6e69dd38e8e91c85 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff index 6c8fd30f48a9308bd3ce0812ba58e2733bd7c766..1728d386a0b989c0d6b989420eefe8e6a1fc01d9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff index 7af28a5e5b79194137c161c54a7d73cb80ddbea0..33980202cb922eb8192bc3c1565dcc7c1f555b6f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb index 1b8352e1ee9da452fd713c1a756c86482df2737e..5b31a795fa051fe6382001027af513aab21cff12 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e635e3fd90e62f2f22ffdf53f78d7bdd4fa50499755a35b844ef028afd5c49ab +oid sha256:9a587d5ddb67a42533df39b1b05311d022b9cb500122dfc7b382e9f459a2b148 size 593552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff index 4d54a8f5230685ebc07cb18782cfe1c7fa625159..aead2b3226d482bda7c015ad4d479b20d6fb5559 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0726acf4cc677c120be41423a084e8a63c6cd2f76cbc46cef819c6225e26ec40 +oid sha256:da3045f0f7af6c2235fc2428acc84add9712c1569dc34e19b016a9e2f1a980ea size 3769344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff index fe9f78cc1a4eb7fc059eb79b0b908a3ea8d22381..96f49bca6dccb1aa20c7c38b9b46e57fe7152c74 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff index b6dfe6f386dc36d0358348ede311fcac0aa63aaf..51c2ea245cd9f79c0fe705c6f4d5b09d635c655c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff index d2d11f31c3d6cd56b71b61d758a29b800250d758..4b17c026f321b138997918acae111f001ce0cb2c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff index 4b148aa8d87c9a4f338ae5a3012a6aaf913cf963..5261ccf43b518357ad89932c9076edad4f67a12e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff index f997d93d2bcbde0e0d4f1d6eae9f0bdb2f7c3ebd..cd96b0905c4cff7fd81d1dd4ead8b5586d3a18c5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff index 97b7e575df22ba17b7b76b72e4597fcd12445b5f..8a0d3c5cb4b2ccec8da55057d1b408b01b8cf0ee 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff index f186ce4e0da50506e8432d6d996ec64f4bce62ac..7050d73a0f8ace8a3fc70595f4d9b4d165aea28f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff index 5a50851bf6edba5e3adbfa70347fcad61a3d5968..aaacc1c7d43f2bed931f5f435e11c043dbb77cb0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff index 5d31af3ff43fae4714ce14dbe40d402cd5638ebb..9944a440631105219131dfe4c9e4c3fcc1b8dcbe 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff index 75a2e946eddf5f9fa0ec32305d9606c10457d912..7fd21fcc81dfdeae164ff3a2b21fb6ffc8514b3b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff index 17eac0ad2f0f5929ecfc8d3e2dcfd1cad982a748..e11224ec542fc6a357e804a3cc0f99c8de6ce41c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff index 3706ba2dee57ec08a3a65b566a5abc38eb0e46f7..93c0980779695d339c0888ed12917bd7bc1d912a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff index 6a1710e03d9a19bb39737ec54107e6f993e62578..502fed883bb7e0cc853af1724f20c7b973f3df56 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff index 2282a0ab22ddcdfc52a9f523874b30d89de2a5fc..3b8b479b40f7076ec8d71689ee6689e130b3f2ed 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff index 21e0dd9ed568e18a09a9930e271c5f5252e7c2d6..a6b42ce07a0758b1480f5b33bb20e4c334b40380 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff index 03fb77addc20cbf58c625956b75f0766cdac2cd4..7936be0b32383f09638a0834a34b44340158feda 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff index 3919312938fee79cb23495f7709008cd669390d4..da3e3422e0bb327195ef1a998e23e67a1b4cd6ca 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff index 6fd5668a2c5fdb5098664ae4875aa1fbf906ee23..39fff34bc8d71b38bfeff8d9346bea1e19d7a87e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff index 3df5774b2876d1b11ff087b3c35e0a9f26aa31a3..51e7dd1a7aeeb6497c26194890d5f245049c3b04 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..034545387ac747962a7097e7b7928f9eeeafa73d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526e8478dff68672c6091200f03d94fa784d760b6af9495a5b5bc52bf238d9bb +size 569536 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ce034e49a016afa0dacd03cc6f49f0fc1a7ce2f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cdc4e858377a179e32b2566e433818bdf91ddda5ddfe5151dc0e78101c9066b +size 1373184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..890dda3939a198b7833ed252da83a32f591d16a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6a21c8e3eeb9125f1a9c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee85311c9e3891aa28d60adf06ff5fbc80d7036133daffe4922823d320286cb +size 1495573 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9bc400a4020a6c7ee652c1e38703f40fb160beb9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a041d777-213e-4863-a68a-ff61a0e75ed0/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e6f998a8fb25f02a6aff7e5b91842aa62f02e132 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_6b862fdfb3a017795535+aa7ee666/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff index 9fbf4379320658d2d9cb66411adb84d4a42a90d0..a93ae187d6afa5d1d82f2922bea29a68a052bd68 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff index e9d6f85bd4755a21c63c3833d600b3f2793463b3..d3e444ce58ba9972dc3431955f36a41eda1882d8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff index f8c45356800dcf8d94acd56476b59730be7fde16..0d6729c29003825dc883a5ad28975f453f58cfd8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff index 4125f327ea3b3d79cd8c81972ee75ef1e8ec5f5d..40ff123d4db3543e1eee73268b3ece50ad3534b9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff index 5fc644da0317c1e8034fe646e204b97efc2a8ff7..c65dee50813279b7dbe666563a76f9750ba569ce 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..191e660654670584866b4fd469eab5c43823156a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b5fea27844a058488b5ad978e2655ebaa9e4aeeb41759246d83ae75a64491a +size 532500 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a692c4b9978fa69f870bf8b9876a1291a55fcfb6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_81d6cd8b8dc031ee7a0b+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ad3d582c2c3b7c434156a44157d97ad122bf9617ba55751c744dd6e7d20661 +size 1813504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff index 879078f137de986050d1c965e5e207289185ebb0..e167aa87c7fd64041cca9a70ac63fbe87f32a90c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff index 6a3efb0cf6a3d4e01bdf6bae1f8c744dc1409183..512fe55750892ccde9127f57c9e00c67a4f5092e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f524860920c61863df72bc2e5afbd66e6818130f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_21907fdb-6cfe-454d-940f-1a48fd28532c/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..618358c2b96cde9d0fdd8a5363f4e3756584720d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a756345a0e47699f8c540a039458d52f4072f93c3d432124a626815c5383b0 +size 14480 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b74ea95989485cbb2ed6eb01226b05cfc87c93e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98faf2170cdcd59554e2bd77f7c6b5753f589ee5b2fc365c8ec2878ac863489a +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3eecd3b73883a7f559ce55597d2e00c266bbd6fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_873a4a74bd5c79a9b582+94842355/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed55c0b1eb2a2e971d348a399167e78963fa6c6a296982b717075c2ce35ec5c8 +size 272962 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff index 0c1efd6c75a87297a6ddf17535231ee74296196f..23d5d7e6154801644cca994278618d27c3951fd2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d227054c74505f8eef508677f4882a66cecef622 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_756e9d69-9d63-481e-9b95-e5d3f2922067/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6da2cba66863fc471486cc2a5da22b16c00d6480 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993f99d2d09d34bd152af4c7f96a1e6e1d8788e1cd4aefb845c601d2f4d5fcfb +size 1931 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..20472006dd6c8b545bff60608b6bd38f4be1dff6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f3ce920eb4fd23251dc93bd78c73607924ddf561ceb94966f0ead594cdba94 +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..14698ec94d46d114519907ccbb770ba5bd7bbaea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8c9f593bdce078bd1baf+44aeeef3/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce28d9de26280cc2dab6950ca40665292605152f52dae452491ab756db27fbd2 +size 136222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb index a6adc8e21ca678546dd3249064e046ddd75eb4a5..be952da347de317d364c85e97ed45978ec10f0c5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d845214d2a88cba3b82d220ba69115b804dd26fbbffef6cf554e026f84ea4e37 +oid sha256:395529a99f71d2c53ebdea1ddab9d3c20cb88f4a53eef1dc2f557c1d2f43ea83 size 474402 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff index bae76b33e9cc22b69ef04c7ba24fea1c3f8fe5a8..6d1a4e87872478e4413248eb929d1e999c32196b 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80b27f8229ae4dcb41c0f874d091377ed61e6a8ff9d7a8e277b1995049a713b9 +oid sha256:ec3b3a30a58e872543f04aec56ad92e14636838909cc7af96bac1c3dbfb159b7 size 42322944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff index 5ba39d6f789f24d098b37c2a21c4e0f850320c2f..fc9d1287a6cab646ee15a6e2921108e6d02955c2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff index 8b9d3af69097296936ce8332f2b3b2a07fb26a59..3dbbfe0002a7498b16cebe5589303a99a865b77e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff index 3c98d7f54eae48906350d2e0aa009681cd71d4ee..612249c69427750828b77e59094ed9b33cedd3c8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7994fb722c0cdb017d8d0b9ccbeee9ee0747cdb7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87aedd12dd5989e8d84f53a3aecb58df8c31e5adc70a3682b0c33ffc3799e7e +size 532500 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a85b6537f05cac462fe59deecc6d3349cad4c1ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_958d41f716f615f7fb01+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa81b8f7bcb32c5911e3bd7fe5883d54ba8e6da6cf13e02c30b845d91f9d37de +size 29942784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff index e0d86d89fb1bb745b253090edd3f771584d43baf..09659879aff5213ecd778370b340b450bfd39f25 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f4fc5c1d2ed52325a4a21d425ffd86d61f339f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072ccb9fb7d86290e26a5375603e1afb662e5f281437419f3893421c85ff7bf3 +size 555200 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ed014ff4a9f79ed0c9ec1747c42209dbe349a52 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc42ed98bd9349ccf3f56a1d05b45dd7294fbe19447af16b872c3ee219077760 +size 1025024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..659dff14a6a4db0ce973475c6b33f9edd7c367f1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bf263b81ef3502ebaf1+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173de181cbb3f31ba4535508bbe909c597112d81dd04e3e6a019544f88c0d76c +size 1147413 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..de774fb9d605df026790251916fe90942763ce31 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a8cc083d-8517-47f9-8804-a5d42d1e17d9/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4bb45842ea0894b69b62a879499055cb6a5cb9ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2fc82d01eae877238b23e1fa62d9bd75d648e5ffdf58b8cf01e4f672c3e27e +size 11280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4bb6d54db62c9b6dd9a89fae38fce039bac7f9ee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18db0a97cbd4d819de01b88d101af19fa0b6e6f4286a8ca6f623efa0a7713c2b +size 1444864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..094f03fbd148a9a2affc7de759ea870414f35394 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9e68ef7eab52f091b941+300990b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69c7fd9346d726d8d5f3ea5a94ef19c60f9a52568bdfff8b33969281ca3eb67 +size 1447845 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e88e5f508b2e222d4c62cbfed66b88dbc4edead6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedde36e9447943a3efdf4ed22ba8e8377c18dc164c05dc9ec3b2404fd285d6c +size 562167 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e068aeb44295640ce4a8095d7940c496a38b5ee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b19d16e87edce0eb519472854464ea428c8c6621ce680463c7a0576186d64b +size 1793024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1cdaa38d5e41939b3540e043995ea8efbdf94f22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a155b32467ad91efdb56+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4364f3e30295be20e1b36a9f8b3877323fbdd44fe0154ef2aa2e7e559a991578 +size 1915413 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e91e4879362effa0cc9c993088b4a0bec29143d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09002100cbee99f3cf9d355464c3b373418336fa7f26fce3379b075a8ff4dd8b +size 571822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cf9b121fc54987dbe2c631e020e5778e3be54268 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e45e2c512692918b8ac7+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94048b1e583bea84cb9e311cfe1f6298e201e617dedcd340fbc12ea64ab735d +size 9503744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb index 087adad3be38716000a37f13a671422143669eca..db90fa54202e67f9c2b63a9f960524c316db768c 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18a82b3beb86fc5ad8db17a90281a3d8d1e499f2379ca89b897d6d0517a715c4 +oid sha256:8ed2e09754ffce7b5957680d04d07b9b18166a7809582694d1e73f3af4b21434 size 590808 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff index da245e319bfba4fcbf7d138047d5aa6a7c1fd781..2e304f5f34c4473ccaf6e0d3745f4d1f6ee5e8e9 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:710da98f53b097220994920c0ac4bf7cd4d9a212534ca5fad35b49591a3a76d5 +oid sha256:6fee6df0d8e815c51f82b7158798b7add293f37bb80ac24f1c373fbcbbed9710 size 1547264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo index 072972696c1e065d6095a4f1cb7f3526629dc105..88a0d5d3f2831cf021957ecb3454157426d77484 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebab97f18ee35856177da37482f6a372d7eee951bb004294e0e7a9d388665df8 +oid sha256:3179cb717e4aabf2107e6f14781201172d17fb10c114b830d04a7472bf9b009e size 1703455 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb index 47dc969631220fdb40e5fe10f7bfae7a5454d613..1f4ba21c9f53777d279e086808276ab93c3b0615 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:018a62e9a6d7b944baff314113a3d60b4bee472c3df99216573823afea25c4f2 +oid sha256:6661ed9cdcff742b06a6c5ab009981de926925468498a831dec14ba213178f65 size 426072 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff index 46a19606011c88af071f3323cca20596bad3859c..2ee7685225d01109cabca2c77360e3dd24b7722b 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1de49bf0ff7551558300def87f8c627e1d6739f02c9fdccd8973a1ed09d101f +oid sha256:f6b2df4e44f711cd0e1f04d8a201cecb4eef47cea703205520508d24777b3bb6 size 2100224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo index 90c5852dfd79413c63a3aac71c566500580104e7..ba9bf7cd4202c74d051ffd415c643db6b20bf348 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34fc3d5f4618002484f5e1ec8c776b5a51794dfa743ace4c8d244723b29f356c +oid sha256:3c71e9d7a2ed1c070836de52d8db409df98b800fc373e8694162ad067655ae0f size 2174311