diff --git a/.gitattributes b/.gitattributes index 965f17e16b0e3526ddc28e8ec8a96e0a76a8a2ad..3356c0e1c32140792d1db6c5d3a7baf2ec32d5e0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -17062,3 +17062,44 @@ neuronxcc-2.21.33363.0+82129205/MODULE_f17348c87fa73ad85ce1+909b2bae/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_f17348c87fa73ad85ce1+909b2bae/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fdc7603ab4547da30e1f+31905a9b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_fdc7603ab4547da30e1f+31905a9b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/31629530ce956e3eb55e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/31629530ce956e3eb55e.json new file mode 100644 index 0000000000000000000000000000000000000000..6d1b09f73e7c148bc81beced1669bc4a8d7a9021 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/31629530ce956e3eb55e.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/6b7aebc14c435ae3db63.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/6b7aebc14c435ae3db63.json new file mode 100644 index 0000000000000000000000000000000000000000..18b3dd814a9962d736002ffa618954a880743a90 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/6b7aebc14c435ae3db63.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/9208f2d66062f6c60fad.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/9208f2d66062f6c60fad.json new file mode 100644 index 0000000000000000000000000000000000000000..8e55762dff90832f765167ca8a5ae096eefb452e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/9208f2d66062f6c60fad.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/a421ae2ad1770f32b078.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/a421ae2ad1770f32b078.json new file mode 100644 index 0000000000000000000000000000000000000000..05936ca3f56d389a525adc1ff8e114c82fedfa97 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/a421ae2ad1770f32b078.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/e912866445bcdd7bd532.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/e912866445bcdd7bd532.json new file mode 100644 index 0000000000000000000000000000000000000000..03661a7fc14cabd01210526826a45c47c1b0c931 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/e912866445bcdd7bd532.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/1171cdc1e0668ecced1b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/1171cdc1e0668ecced1b.json new file mode 100644 index 0000000000000000000000000000000000000000..83a4adbcfbfb3249b20ca9428c1d2b331cde40ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/1171cdc1e0668ecced1b.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/08703cff93ab144d67ec.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/08703cff93ab144d67ec.json new file mode 100644 index 0000000000000000000000000000000000000000..61016ef546228554d1539f4cb349f2306691b0cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/08703cff93ab144d67ec.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/21c3bc6d9846044edf76.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/21c3bc6d9846044edf76.json new file mode 100644 index 0000000000000000000000000000000000000000..0c0a148ffc6238c3a5d99ee5f8236a0910cf2bb8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/21c3bc6d9846044edf76.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/2c6cc66c4776ff9413f0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/2c6cc66c4776ff9413f0.json new file mode 100644 index 0000000000000000000000000000000000000000..819d571280be61027273725f91335958f3175073 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/2c6cc66c4776ff9413f0.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/43be67542ceab3db3486.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/43be67542ceab3db3486.json new file mode 100644 index 0000000000000000000000000000000000000000..552c7e318f62973392079489d891b05f4f01307c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/43be67542ceab3db3486.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d57c7c9107ac77a642d4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d57c7c9107ac77a642d4.json new file mode 100644 index 0000000000000000000000000000000000000000..c9687bb9caba74192794967e389ad292bffc2cc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d57c7c9107ac77a642d4.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4b6c7416e39455d1ad2e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4b6c7416e39455d1ad2e.json new file mode 100644 index 0000000000000000000000000000000000000000..edf2e32a3ffa22640ab356b9183dd230e3eae002 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4b6c7416e39455d1ad2e.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5d5179e2ac0b724548b0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5d5179e2ac0b724548b0.json new file mode 100644 index 0000000000000000000000000000000000000000..d58e4598a93c1e6ceea114a81e737328e822c003 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5d5179e2ac0b724548b0.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/8f8e8c8f417393bfd04a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/8f8e8c8f417393bfd04a.json new file mode 100644 index 0000000000000000000000000000000000000000..5b973fa16a70a24bf31d9634f4f43bb7fc66b3c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/8f8e8c8f417393bfd04a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7536b906773aff032659.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7536b906773aff032659.json new file mode 100644 index 0000000000000000000000000000000000000000..5121aba3b4503fdf7c5117d098655ca0d41dcfd9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7536b906773aff032659.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/87f1aaa5ae1a31bb7adc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/87f1aaa5ae1a31bb7adc.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebd30127d8aef646a275d1ef9f190184088dc00 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/87f1aaa5ae1a31bb7adc.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/b2c32dabac1b1c034891.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/b2c32dabac1b1c034891.json new file mode 100644 index 0000000000000000000000000000000000000000..c90a85816810d974cc024c0ad117b6b124a959f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/b2c32dabac1b1c034891.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7e378b918d8aa52c31f9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7e378b918d8aa52c31f9.json new file mode 100644 index 0000000000000000000000000000000000000000..eb9f1d0cfe4660390dbc84c7aa8524434aa1fb55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/7e378b918d8aa52c31f9.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ad0f2997db25e16e085b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ad0f2997db25e16e085b.json new file mode 100644 index 0000000000000000000000000000000000000000..52d5acc13998240384df38ecf2eff69f3ef4d042 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ad0f2997db25e16e085b.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/947cd8633065b84ee800.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/947cd8633065b84ee800.json new file mode 100644 index 0000000000000000000000000000000000000000..1ef1f86912ecc66fba8032b4d3cf1e104168de8c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/947cd8633065b84ee800.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c3b960b90a5e3c592c04.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c3b960b90a5e3c592c04.json new file mode 100644 index 0000000000000000000000000000000000000000..5468e43a9e8009954da45a28115c7bef18104469 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c3b960b90a5e3c592c04.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/d244406bccd80288e991.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/d244406bccd80288e991.json new file mode 100644 index 0000000000000000000000000000000000000000..a55b49da2a9045011fb3650a640da380dec561cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/d244406bccd80288e991.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0fa4ed4626958e306002.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0fa4ed4626958e306002.json new file mode 100644 index 0000000000000000000000000000000000000000..066fa7f445869c7e1b1e889759bb2996b0a21b0e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0fa4ed4626958e306002.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/124119d9f41b205009a0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/124119d9f41b205009a0.json new file mode 100644 index 0000000000000000000000000000000000000000..f418d2a06be93d78bc0cd40102f5d69a20a44ddb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/124119d9f41b205009a0.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/80f4956a4a5775280772.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/80f4956a4a5775280772.json new file mode 100644 index 0000000000000000000000000000000000000000..fd48c09ea37cb8cb7c9d71611d18c5b711e244c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/80f4956a4a5775280772.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json new file mode 100644 index 0000000000000000000000000000000000000000..d476a2aed0827d204493ab60dccf43f587ecb6f3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ce4667775a41bf1523e4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ce4667775a41bf1523e4.json new file mode 100644 index 0000000000000000000000000000000000000000..07e6e2cb452ac9d37ca47710c7d138a238375903 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/ce4667775a41bf1523e4.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/4aab0cb19b60948b084e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/4aab0cb19b60948b084e.json new file mode 100644 index 0000000000000000000000000000000000000000..202130a0db2dc520a85aead12163a8475ecb8c0a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/4aab0cb19b60948b084e.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/a421ae2ad1770f32b078.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/a421ae2ad1770f32b078.json new file mode 100644 index 0000000000000000000000000000000000000000..05936ca3f56d389a525adc1ff8e114c82fedfa97 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/a421ae2ad1770f32b078.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4aab0cb19b60948b084e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4aab0cb19b60948b084e.json new file mode 100644 index 0000000000000000000000000000000000000000..202130a0db2dc520a85aead12163a8475ecb8c0a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4aab0cb19b60948b084e.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/31629530ce956e3eb55e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/31629530ce956e3eb55e.json new file mode 100644 index 0000000000000000000000000000000000000000..6d1b09f73e7c148bc81beced1669bc4a8d7a9021 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/31629530ce956e3eb55e.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/5d5179e2ac0b724548b0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/5d5179e2ac0b724548b0.json new file mode 100644 index 0000000000000000000000000000000000000000..d58e4598a93c1e6ceea114a81e737328e822c003 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/5d5179e2ac0b724548b0.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json new file mode 100644 index 0000000000000000000000000000000000000000..d476a2aed0827d204493ab60dccf43f587ecb6f3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/d244406bccd80288e991.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/d244406bccd80288e991.json new file mode 100644 index 0000000000000000000000000000000000000000..a55b49da2a9045011fb3650a640da380dec561cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/d244406bccd80288e991.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/1171cdc1e0668ecced1b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/1171cdc1e0668ecced1b.json new file mode 100644 index 0000000000000000000000000000000000000000..83a4adbcfbfb3249b20ca9428c1d2b331cde40ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/1171cdc1e0668ecced1b.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/6b7aebc14c435ae3db63.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/6b7aebc14c435ae3db63.json new file mode 100644 index 0000000000000000000000000000000000000000..18b3dd814a9962d736002ffa618954a880743a90 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/6b7aebc14c435ae3db63.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/2c6cc66c4776ff9413f0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/2c6cc66c4776ff9413f0.json new file mode 100644 index 0000000000000000000000000000000000000000..819d571280be61027273725f91335958f3175073 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/Qwen/Qwen2.5-0.5B/2c6cc66c4776ff9413f0.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/7e378b918d8aa52c31f9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/7e378b918d8aa52c31f9.json new file mode 100644 index 0000000000000000000000000000000000000000..eb9f1d0cfe4660390dbc84c7aa8524434aa1fb55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/7e378b918d8aa52c31f9.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/c3b960b90a5e3c592c04.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/c3b960b90a5e3c592c04.json new file mode 100644 index 0000000000000000000000000000000000000000..5468e43a9e8009954da45a28115c7bef18104469 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-0.6B/c3b960b90a5e3c592c04.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/43be67542ceab3db3486.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/43be67542ceab3db3486.json new file mode 100644 index 0000000000000000000000000000000000000000..552c7e318f62973392079489d891b05f4f01307c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/43be67542ceab3db3486.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ad0f2997db25e16e085b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ad0f2997db25e16e085b.json new file mode 100644 index 0000000000000000000000000000000000000000..52d5acc13998240384df38ecf2eff69f3ef4d042 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ad0f2997db25e16e085b.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/b2c32dabac1b1c034891.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/b2c32dabac1b1c034891.json new file mode 100644 index 0000000000000000000000000000000000000000..c90a85816810d974cc024c0ad117b6b124a959f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/smollm3/HuggingFaceTB/SmolLM3-3B/b2c32dabac1b1c034891.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff index 8de8b1d38cc51cfa0b83838fe7900fa02f776be3..25c5f8adc9ed466dd86deea2d0194c555b9064a7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff index 4bd1073dfa13c2f8c5a95783acd01b92eb526f0c..e60dc116d27a695eeb8a321002afac9476845eb5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff index c8665b5d2eee35cf94f564efed08c3ad800c7666..4928a9f81e8469866188df10eeffde299203c419 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff index c7e28f747b628dba642489b3dcc0e766a14fa4ab..f317d53ef7a19a89aa3599ffc10302cf9f73cf3d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff index 8d2eb5a812c07dbf8ccc20735c6316ab728670fc..ca287acecce4525205e9bdd0599614cecd84cb9d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff index 8ecdc9cb4f9873ad10de523798862b7527999edd..23147b3443478344ca858a6b11469ccc3dd53212 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a90369c76025677d5e855d5488b792afcf41108 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afd8357e767e0c937818a5c3259924093801ff6406b49d123d4267c371e3ac4 +size 61659 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bfcc60730354cf914d19ce4da0fc6a08455e1fb4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9ea2a1608587632f8e58f8c2619eec3f946ebd846e75a9c3e4c1b32a799a35 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff index e5db2ae035f5ea95a4bbccd4cf7150372975a5d0..ebf7a3603cb4824d6cf9d6a9f667969d5b8441ec 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff index 325f47f8de2978e26329adf136b9914c82f15690..71eb2b6b0c92d92e7f32f003ec362f1ce8d86068 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff index b2ad83b9c383fba27c1e06252d99c6f913f09813..dd40c929024e6c86fa09d02cf889ddd3ab7ce7a2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff index e8ddd7d7b06a7a2a01c55aeee4dfe192c9cb157e..44b8374c10ff16c82c77d4da2c2f2556f5b26984 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff index 50d3016d13b99c8cb172a0a52877280c505585a6..232fb4366813beb01e128e98d24b97b516339ce7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff index 71f5f6a0c045f2f5b1bfacb15a5f176d52081cff..64dc08b906e859236cd51803a221a751758cf6d8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff index 49fa6490212017543a61d5b016355ec03d1bcca9..9098a7cda29271a44eed37cf5a1005e485ff465c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7e17bd0deef25864ee8c64da9e5f7cf173b76f3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5405ee9c8f41348d0bd893b5e48ec19122bb8043738550e95ffb2f5449876a4d +size 866871 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6935257eceeb0b33304b254bdf5ea279fddcd8df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_137d2b1e9d185656d517+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8bae57e4f136528379d6d32bc9abb7a9db46a13c031d168a46da79d7fc674e +size 29758464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff index f7679594c8fb16bb7ea393fc162ee1b8535bc536..350ffa613c0b75e9c6c41ceff724974cba5459f7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff index 4b95150a7450f8285c303cfed7c1e4003dd305e8..6c3564b191116965963ff5c5da62b4f25331ce07 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff index de582ec7ebd8fe3cc1cc41fbb2e6acb84b780fa0..63454104c8ca96a0e7897dfbb21ac8a51887dc42 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff index 9428c2d107899b1faca1dfc6dee3b98de92b69ca..1c425c8ff6a8e2f004fdad0ef80d7682acdd6549 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff index ec8fc0ecb210f6db92fcb3591fa5f7e06a2ef021..29199cba1f74464a52a0c2ad14ad22b404894a85 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48a344041b5fe0453f0326ca4666d641b857b848 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634d4aff7362c268d4f3513547f9254e6db1d74fb7af805564d70bd894f8cddf +size 62419 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d9609bbfc314bc3f26e1d163a17e58ffdc7218cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adf01ee775d350dfa5975c0e6c03f7cb5d51c49021907af57999cb614c380d1 +size 123904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff index 8a82614ef54b74e2476fc4545bb51559d5e7387d..fc5743172a455979404e2344682cf9334873ea66 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59a6b813d1fb4121c53c2541229d751a1edd8090 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17eb59e11bb8aee29f7c2774caf5537451a7dbe3c5e4192b127a63ed0548ee12 +size 62419 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da2fb8e54676b19a922791892357a2cd7ee3a285 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eed4b70b26e4f84102dcd8b84a44b5e053ad5e0c1ef9fc81cab40166ee4f02f +size 113664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff index b425ce926b7806740ab76e3ce5cb6cce5602a58b..665466270e66991df45c004ce71c351ed1b0abeb 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff index de37898f3641e46646b8241f0f7b17af7e843d40..cde2b6397921ba5a39b82589dd6388e00f90d7ad 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff index f1d9415e149dd2d8baf57e694124da14edecb30f..ebfb991636406eee12058a2a207b89a440fbcf1c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff index 784bc769a1274dc0828dfcf722a00511f7cc22fb..d5180aa594f76046bc627e05e772030a20c5d564 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff index 2812f46e3183dd812f9d7e5594aedc46397e00f6..64ef364e4fa03bd33ef1ab8cf649f5aff688ca6b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff index 6a7cc357e118f49ec004299936b9fe11826588a0..d071f32d5b13f36e524a496269f342b5b65fbde9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff index 16be3d835a2792e574bd9aa9fbdee22d2474cfb8..5d995694c05232ffd68ce3d3545c1fa2fdb822da 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff index 39f2cf202b0a0c253dfbcb0878402a8e5e75f4d7..8b27719034d90e173db270895834211514dc6507 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff index 5d5fbaabd8afdf9ec6e46859c1ec53c94a12b9f5..21c961e8bf98960de072a2fd2b1d110bfafc717c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff index 6b0c24b930ec1d4aba129cfb72d61da11aacdd84..78c471f951cd455a97dc06be00075a2ed980128c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff index 346fa8283889fa3ffa56ed20c08706786b220aaa..a855ec3bca9a117793f06a39091a8c4c2572d1a1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff index 12b26deacf6671ec8202a50fa4b1f01c7ec7d094..05f832f584fb714f6faceaf69aa2b57d6848f868 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff index 7c3132640198c8435d9860374c729e3d96020fc1..13bcc679ffd0fd48ca6c1a4e1071f01e0f09400f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff index 0aa9cb824e0e41c0a8bdc5ab621621e1644e39f5..e686e3cda9278a0bbf6b4ef63a55eba9e73d6c7b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff index 2c2b8f37d9d5d68514df1e35fcb62bfd35e6f465..65f641c9ca7ec6a8a8e73002c0d4b564f0b36263 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index dc8d438e87970da2c50793b8180cc3a0a205135e..e53592674319420ec0b935bedc74d61afcb4e3e8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff index 9fdc0a3db456ea557fdaf0491bb73368638ed913..e5b13f1db1dc2be5368d712bb5f41e4a2e3b65a7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff index 8bf4969771f8751cd3b0d204b04222c8a792a500..87be3eb7a3d21d751c95bb17ecc9cdff505df7ea 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff index b4a1e2b1dade32fd9be914789463cbd294cf0d4f..264deb8fcd176288838c1236dfded458567ebaaf 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..33a2342c9d24f4e4f30ebf8e15aed8e686e0c90a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10aec6163a2220dc0eaf386dbf1178e07df1a712fd50b018fe14148cd7e532bc +size 593552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2a6cee160931a299bf997dbf6e0c99262152da0b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18044ed346a94d78250a+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7812a21744e91c1f6fb470437568f2cdaf1dce297e0bd82152e22bc757b1c52c +size 3769344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff index d8456a69765bf31bc09bca1f4a7947aedb32b1de..563a14cabada0cec6d1a8a67b6a7b25ec17029a5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff index ec02e574c7bf9333f97f2def63b5125bc208b151..693b1a61b56a89cb6c00509ef62072db9c9ebacd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff index f6b7b583689626eda2f2564cb086d350f15fe2c3..2f31f624aaf5297188e8cd1ded909ef48980d23b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff index 8c32705ebdaabc1a4b631abf224562b5d8a648d3..6e32a1b23343e792f875050cf5a6adb446dc4930 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff index 9388422a87e0bc0c423b43f83c3daf565e89a25c..02d52e4fcc60f5b538654c47b2bd3bb90d894bdc 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d7920635c9a7e9461f6059b07447f11403757c4c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3e67b7c6-6838-4b93-afc1-c480a0337182/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a0f667bc778c229b2631841e05bcc8d42b705d8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ce13aa478f17ff3bcd96fc61b0352a2fc81e62b29f20a876929fc5acc1feea +size 103424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..80d4789580444af4f035450dc4d349ce7b17054a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1adb90886791031c9f74+06b21f21/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524ed39c197fcbd18e9ecb4f035c13e12dc346c0e80489b2d4debe41f2c8b45b +size 104320 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e9f105c4c740bad977cabf7405cf46532b76f19a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be338875ab6145176482ad294a969a3604fb2169a624fb7f85e6fcf10c3650e +size 64999 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5db27c0b698edbb015b6bedc2715b4d95b2fbe73 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2060474045114402138+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c26fd0310e7ba7c55c83b375c05960971fae967057db291ba424058f1d8a0b +size 308224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff index 0505d95319792450da8d3975b021ac1048234938..0526d6db0b4626ed2b9a16b682ea0e09f3a8c79a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff index 59d5df4e2a3ebfd2fc7e4de17f12041cbb77404a..7d22556726ed959e659daef78ef2a6adedbef436 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff index e3c2f0551c5591e458efd12ecda6584e38105073..50fe4b7d3fbca84f8e408935697267edefed7d58 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff index e64e8021e59f50ca6efca516cb430ac620a27f75..689b3142ccda53d99c31da4b87660a44297e886b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff index f8833658dc8f92e141831d0a7efc935b7a03a986..ea521560cb51127bbc46c409aee713b29db8ef19 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff index 9788622208eef9b673ae906c1e7ef87f39c84773..a1416eab4ecc7c8aadf738d63022436893159718 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff index 50dad35b972323ce32ab7cd32bbe89d7353c813e..093f28daffa23b2b37f5c3ba613dc0975f9c72ae 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff index 03b63112bdbad2c501c0f3e5cddae9332750a775..180e336da33bb6acbc948c7607953f9ee8821df0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..027980adf632b7582a8ab93d31c963aab9b84583 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3c954a4e-c8a5-41f1-8096-447d85ccdbb1/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a08f56750e89f23c88870a39e1797f558ffc883e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e20c34c8595d6b35f0f837a9c878355216ef581a388ef1ab8b36dc7313e9f5 +size 11280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc511659c17218316fad569446864ad55dca593d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb1d606c533b2fba2be254d516e2b135a32ce0bda5e2582421c25da1f56ad1e +size 1444864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2890bb3ca6bcc6c3b6f221b59d721a6d9ffd6359 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fda55dcd99cd2bf7c01+74e4ea3a/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57fea67a504629eccbaae5f48b4580bd60f34195b54b7e72447d7cd0b26ccc8 +size 1447845 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff index 2386e91c85230c47489476284ec12a76034e61ae..f59ce0c3412d7251e1fe30fd2716e6fddea7f030 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff index 7bb09c481047057b182dc14b3784d57a70fc1ed9..d5060e675858177aa981161fa9831ebde7e9e8a0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff index aa4394042a2dd1e4841dd518a7e548a6226fb764..328469a63237dd08664d8ed874e286d61f40bc8a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff index 2b6fcf83329fd664414f488e1eb2b3585f3f6f93..458b4ba0ca7e05dd0ce7314babc41171c0d631a1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff index 51cb2861ed2da4cd6b3b120a24fac964808d4b98..cb2f493297a8c9ddd9225154fb45393ee9580419 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff index 60cd4fb0cbe82be1f4084bfba754ae9bd7fcd6be..c5c1cc2827185536f650aee5fcd205e5fbce040a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff index 680f95acc8cce9b9d9c91c8d4fcb4915167bd5f5..f8313f93e8590084a8da5b0be6ffca992c9ed96f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff index 4add089d561df619c660d684590cec9562333163..8ae890af380d61ecff08fa64d2e979de6d7102af 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0f71040757dac9b1899503ae6be7435ab4359ff7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e10811f3cbfceca4afeaa5a5ba50606eff9b24705bbc9559eaaeefcb9b7e8a6f +size 575211 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..355fef2efc72c35c30cd1acdcc924bc40e19d95d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd628eec1476064a8204ca007ff40fbed4cc23f378e02ecfc6227569a072e88c +size 1240064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..55cd842377b6008459dbd38c4e97ea2705d18893 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47bbfc2252c9a9fd3c68+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7eb8e831982ef037ec0b1ac31e6a7ebad00c4984d69d380c768ba8c01a0075 +size 1362453 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff index 616a1b3556bc9c95c661dfa30b25cfdc1f56222d..1ccfc86df64a0a1d03b7e204af8fc79bb0403f15 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff index 81dfeb673d15fed2ddc0f1fb718b530e02416584..d1b9ff04798e54d83f25edb3d300338ec5dafad7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff index d985143e7bcfe801e15653a5b25ab48b71fb9535..3e17a532163aa957c59bf1d6896f51e3181ec47b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df7d301050ddc10b2abb000b7f3347e2a64a34aa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9859a1dde3f34be917689060814f410f3f0c415d5e60a0f7d0b41d419e7e7211 +size 66367 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c3c1c5366705e8350ee9f3b0a4a660bda0abbb4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_522781774461263035+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac10e1761913d16ede748c9496a728d029ebeb45f73c94a4db2c1701cf0755e +size 216064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff index bbe82ae9e027b123df37545f6a45c7025b7423cf..615feee2f8637d4251597a505192fbbe69b4ac84 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fdc2e262e41e67fb2ff5a25aab1c06bf82c010ba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797759eca70afb4ae800ce701f57199b1401dbe51ee6b554466789d938c4c37b +size 571822 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b4d020eb6da2d99c5496d5219b07d43fed5de8ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5620f0d58954f88c2890+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cac46ef4ae79281922f276f2620982a3742e6e3c1b7872ddcd81422d481a155 +size 1905664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ae1923b11befef7f4a5c1f767547cf80f0d1a3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_95b964db-6722-4f7a-af2e-b53fa33e357c/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..307f90c8e8aa2efa59b5a7f04944e13f8a7e1b5f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc96f7fb7fbf64514a818a779dd383411b8c502b328094192490c454e498a825 +size 14480 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5912c948d5731d91b6ce18087c824ba011bedd56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218a030f56347553aaf49d0d03b37ac6b88f70948a780b90c9de798e63755462 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3aac18044810eee3f1d1170115fc6d0efb08bdd9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_56637414cad2eda298a8+066be99d/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff5b5ffed1d7d0e89245cdf5e5912418c0b7796c4aa7d6642973a8abc503285 +size 272962 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff index 18fc33c39a8f096600df41934d1a94357c2c7998..c0aca32df0b76446797630a82cb007b7e1c1c024 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff index 4afcff3e5435b44d7c4bbcc0e1c7cefd4575c452..0414eae4024db53695887bb30e6f9c1a1493e1e1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fa73f48a42c28b68801126c4f6452b9f0df54b3a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_07e0b979-fbac-4751-835e-9ee2bf756be2/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..adcc8aee2ca2db0fb4f9292d55b61f7aca4c547a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80e7d6d10b9e57d7975b715f6b5048485e54ef4cdb92d5d73309fb5dadb7673 +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..673241e93a84fea84e5687a61cd7f09fb0f7acc2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce83a211d91d27521a9227aefae057077973c3cc635521c706f2130dbe406a3d +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..31fdf5b3d32d242a0ae83de15eeca57821ac8fc1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a955c273582832570b1+8ecabf0c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d752331924f84c2cae0150914ac0655e7e848e7df72442f2f9e1caa2bd4a3d +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..73a7def6452a874bdfaa32e535b2b465be636b6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_853cf8ea-c2b8-46f8-bdfd-3a9a9d22e0e9/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f2ee42eb9f1aa790cfddd8e066da7c8aeb1214d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f96487dad4bb02b98bf2c955fe59650a5fdbcf1d763fdf56ec412b62b5774c +size 5596 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e8613428f0c0836e158551a66766b5783fe31131 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_60380a9e11ed642e576e+6a76fc07/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff index 97c6ba667b243552de427c0a42995473c41cd970..1725b4fb7bec5dccdcd63351e0e59b608c554e25 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff index d2e85e31ad151b76d41f6f28e51537ccfd597d91..7ef519c2f63606cad00a85b82c54501203d44ad5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff index 8535170960592b034c4d6c2f98ca73757c7e7b11..3600afcf67d812019190d4e71d3d478878913cd6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff index 30f067e23e7ec59e33877937f9d3ad99f01e9470..f22389f57be9775e51d77705196e728775babbc6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff index 48d241a1afab16a5d627793ac363c0b77b0ba69a..bbdbdbd9a529632461220a8d774f6a5a4f1df839 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..86a02e8a6b52d65d970eeaf3e0e50da02304de6a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51fdfdae7127466be2ec79701008b720297fad35905d1ca7fe02a528931a44f +size 412170 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..05f31aacc47cf367c46349facb8dd75c9c9ec41f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_691f0ef9fca273527dbe+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264d323f483f1b288c66424b9648e9fd7f37c4ff2febad67ae2d0ccfa4526ed7 +size 5868544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff index f8bacfb08c58904b931253d3cff36ac38c306e74..1490e0abf693b847d5a18f5b75a0c29b4ce1ec9f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eaed824e127a6bef5f2308aef3e2ff73696166e9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631e2888f6ee71acf1c9eb784246d591937aa8de79993a6850177196c2bc8fa2 +size 1207180 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a31627a5135a6b8129ed724fb378b163a8494383 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71833bfb10ff3967bd3d+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3da1c0626101d789222ba962ee1f2b30e606a10b59ead008350d5bc0a19e62 +size 58307584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3872b9438b34bd84feef7b0d0a59b95291e3dad3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e07c09f42746fc019727656a4d50e8514799e6c2d67878759a337dd59f3bb37 +size 57415 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c175d8c70eeb7ddeb7635b14fe29489f6c706292 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7247213647965899113+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc10a315d899b395be229c300d3ff8dbbbf0e4b3a11d4559d88f4c3eb47de4e +size 113664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..38fbd3b2eb00172f0cbd3f2ccbff8489c2b6e609 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97efffa742969f5202645c825830fb6c7fc9a7a81dd9d38d384ec033a20af4b +size 68055 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d7f325c935fd49afdbcadc648932ea0ea75e74b8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7671391308733910418+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42728e3200e430742631df86e92068e2d4bf62244a193251d99acf5d1730ccae +size 308224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..114ccac20c9a8b151de7214bfa21700752168df8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_90ceb9ba-5e9c-4db0-b2a4-f810fac0e5fc/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..36897c006be58fcc135059831fe41ce96ad676a1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e542c76cf27c3f807feac47dd3f58fd4c308e8d9e932f0732774bee19d640d8 +size 3881 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f792177b15361f8cac716087fa028989214bee4 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_76c00601a47c85b66aee+dabdb665/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff index 0c29857824cd3ef89e9ed88374f261b211c57842..6fa59059e0c38c22705822c128b5f2e34bb74616 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff index 05544c6de693f73bad182475d40f808888079643..5599f4a3c38ff954424c7dc52f1b083479a7436d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f81fba30b956a75382105f8a90859f793a1a2f88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75d362e9e0a07fde2bac3a2dd8a402ff7c962743c438d25b871a7f38d269aa0 +size 62419 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b02cc857bb741a7b0ef9cbe50b3c29093061e82a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7975285533996118114+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f92d88de1670c51a86d2e78295ee4e7f67729dd0fbba0be13b0af2a1846bb05 +size 123904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff index 1aee8723cefa343d51a2f9d1aba7682d31339cb1..a817e8c43ce672ef728172e64dffeb5dc0cd8d3f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff index a2d7c861ada592b49eeeca36627b13ab0a22f60c..899d12acbb2e3b269d72e39d8e1ff85ccfdd89e0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff index 7ca8c1044acecc9e459fb857ba09c62328b628cc..479db7bad5c106bbc6623f7f84aa46742f69c407 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..97047e30661eca84250f4d49f008dc8a892a851a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9f0441f4-c1eb-4623-aa7e-fa2dab8d5cdd/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1122d44653d734af6a44cd149690f70cdefd9b05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a258ce111db27dc5ba46ec4f9f6877c56b0777a5534c0eba14488ff9eaf298 +size 5596 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2093f6a371cf0c28928eb54c86684bb2bc69fb80 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_845729b4f5690967accb+84af1141/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff index 47c6b691c8fa319a7bd4407953ae0aae315ac34d..e053814e61e99b9f7c339944253b911f403ad174 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff index 5da7e906f49b677c1e97265f7dc3d1222b0279cd..0addc21a0f8e8b574755a91436cce5f56bd2baf8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff index 13c14b87216d3b61d8c5f29e567072faefe8997e..f920f5bc3e1ba576153eefa6a1a9cec3931a330b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f89b65d61b8e3d8b460420de43f1934dd63a6cc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80d22b5bdefedc913778a25fbbb4a23b7497c950f0de5bb855e6224ff9dba32 +size 62419 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..05577e1e26add4e606273496c8da863656d4f818 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9176004544586841168+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e22a43fc5b59e1277a5a4d1ce0ed25ce06a05578a6889c68837aefc43085ca +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef86b9020d24b159071d43532b83b2d5406d2f68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_d857d88b-f231-4f47-8ef4-80c9eb33987c/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..907cbe088c5cbc65b0041e5b33a66b956db3651a Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_91bac378e577371e1870+62fca7d5/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff index c0b2a7b79933d41585dac58c80daa2309976620b..91151eaed459bf5ba49104ec3adc51a2ec090289 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff index 67d98424885b8b32323b6545a2ed3f6da5a782de..97c6f613168190b63545439901881de863a3f627 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..af1a1f48a5ef3f69669a415d275780e8bae3db4f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bcc80b2ad2cb1e12abbca61cd3f324f6f4f564cf78d121d0f5fcd01305c683 +size 451319 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..edd73552ff8f4f9e8d8d13dbc1545c5f9edf4590 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb68e29ac9c2790c0764956146429d2afa0a88f59a7e87b54141271d789ccb53 +size 2509824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d7c716136edc57ba7471385f4b1306b5182a8df5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dab80edb15bb2de7835eb2ba89df093fd885bdc38b96f0a1852f767c384202 +size 2583911 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff index c807d5fd091cd6e6d084a82cf28ba759c25ecdfc..bf8a26db8a698b35e731be97e7299f673e35bdef 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..b8b699daf4f9205851ca22f6163646c66478c2db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_38ba0ce2-4d72-499a-a0a1-455f21581970/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b050545e85d1adc68c37499eadab4b93c066a5c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b705d66437ed91a243401b8b01711b8337e3147746669a22b30b46f47feefa +size 29532 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d4e1cc39aab00daacb5d4fff777497deeb641b7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3634b0bff1aeaab8a97b8a905a99b0389abda643a4730af9e0b6d01222049cc +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5deb279d05058039b3aec92a77061066010a9337 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9ba42250590aa5875bc2+57a3a5ff/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d28c3701200b7d44fa8f8bcea27dc91838e7bc460d45b71228d67b6491d7e0a +size 334452 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7de4a3a07694ec555cee06a24b3fe5ee6f927983 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_4c6797e5-21d5-46d4-a667-0df1409ddfd0/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c213570f630857303e7b0ec653018546c02dc983 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5efb0ea01dc0241314f91e19cbd9be551d1a66e7dbadd7cd4d6473d6b7d4b99a +size 3881 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95bf4988e45d767a00539871f368ec6920d1da4d Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_a0eb63cdfef690daa7e4+901193e5/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f3dd325b4bf515de91ad979b06f56f9afd8afd5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ddabd0c90ed641a0ad314822231050850400aa553214d5529473bf4ff0a305 +size 437679 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89c229b1b1058bdcb2a2fb270c20a65af1ccb51d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11b69acff75055f0546f43508dcdbd542ee3eaff6195b6c57a3af7edf77dc6f +size 2438144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cfc412486de729dfb647d90574d4f6ddc623fc33 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a3c6a617f4311e09ad49+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57cd0d9c32e1bb6bce78c9222799e7c01e0b73129fbb8e01387a02910df1030 +size 2512245 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6aca56152be2b63949b8e7e51c4ab3fd05b96a6e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:679ce8c5ef949ac06470210b35e4adee33fe05ef0fa91cce2bd5b47cfb6388c9 +size 559974 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..51d39e33165e3898a8841721f54d4c0990b1e4fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ab293424ba3cf7b684c0+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd865120e98300df7ed41b6a6657309ee50589d33e8f1aaeb76adb5aad9fac5 +size 44893184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ad3809a6302aee089919547ae91b6b9aeceaf9d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_c3082995-654a-4d0b-950e-e09f11de8994/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6da2cba66863fc471486cc2a5da22b16c00d6480 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993f99d2d09d34bd152af4c7f96a1e6e1d8788e1cd4aefb845c601d2f4d5fcfb +size 1931 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..877819b7a90f85615e1c266b330f49fffb6728e1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e037f2d4f7dad0854127768c2435bcfbf01663c17860971c857e1c0d6665e6 +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..390f57cc80b55e9706e5bae23dd8d1928a169de1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b097ceba291117567f12+f6339882/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c8cdd2bc4d06af106874054b94b0cea8ae5474a338173b14728ab926196df3 +size 136222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac3e6b48e8dba764cc288ca883c0f597c620535d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503f2ba00e01ec69dbbab68e814ed2bb73f646b48b6892b515292a585e9dfe2d +size 839610 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a8bb898d682d24a5f13acdcec6e2c7ea242e613d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c68ed9d0d3211fb37165+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9d8573bdda4cd25a8b90976246d32cdd841ea5e0890eb3aa6f7ee6a972eb94 +size 12626944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59a076083253d9de16cfd51e1ee597ae9e8d3b14 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726697ae6d69106a35fc6caab871c35bccbf129873b887f75986cfbdd687e625 +size 588724 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f21fb9c1b37177ab43fe11f01202aa3f2086d77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f353a7f7b170b85e60abe2956848930d93fbb17b91c48a5fb5f89605979c662 +size 1659904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4ef8b262adc22a340cdb2a97c9e9131f4766b9c1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61cb51f77618dd82308e979bbd1a94946a7e0d90a0a1d37e561c52ac3c36a64 +size 1782293 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..35918cf272a3b2b57c280dfab5292a07cb621c1b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78e69b77e5c34297018c01769fa5cb3dc76d5a1edeadcfa530419c770b4bc0bd +size 616389 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e09ba1e54270ae1f5121b2488e3be9a946af0519 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440dff8141899e1feee0b232802eb84646ca15db0fdfedf5c6ac261952a33027 +size 1793024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d377cdf6bfb78ce7e5db0cebe2747f25dcfc4bd0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cafe5e1cc124bf1d35e0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e7c6096a78d91c83b5aa8a9e9a48f876e3782f8b163f6a69ea89dde69e9e22 +size 1949215 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0b701e4164fd2a5e57ab18c5b330e8849286a18 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3122b05453bf9f572786aa49b7e5df0168024a8f52330ce2671cc7e157cd81 +size 904767 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b17d55caa1b58e299dee600e47ddd7acc15f59fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6283f00665e3747e029a916a314a7185c78798b530296350881c6422ec1cad72 +size 5827584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c51d74f1722e036a88f7b7580dcaf479cf3d039d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d50f09930f3886ac4b66+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36761cca5a82603e8d8cb052c52a7b98c82217d90b6c06f094e68d980b82636 +size 5994108 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c277888420f00defd99fc3c102007a98b09199d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..efcaab2c12bed35b75bda2df9c71132dc3a407f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8039f37cf8f74df32eb6821df951e715cb9be5a7b6c9374dafd414d5ed8545bd +size 1538064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b17c7a094f3eed6d7c1501f0916bc4fc22b29b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d31257822d52e37e1fc1b5d7630208f8c8b75bbff6ad6594c32a1335526791 +size 111862784