diff --git a/.gitattributes b/.gitattributes index cc1287896d5bb0bba7b4f11cbe7249fa6dda8ccc..69db5cedc03e8ffc47705da4aa3483a10d624ec7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15051,3 +15051,86 @@ neuronxcc-2.21.33363.0+82129205/MODULE_dd366124647aeec64074+a02c3a36/wrapped_nef neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_2fa826853951fded06a7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c70db4c7be1d872d4380.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c70db4c7be1d872d4380.json new file mode 100644 index 0000000000000000000000000000000000000000..9ef0acc109fb18b45d0d8b59f21f14424323989f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c70db4c7be1d872d4380.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/75a72117d9bb1d65cc39.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/75a72117d9bb1d65cc39.json new file mode 100644 index 0000000000000000000000000000000000000000..2ad1e8f5287ec8d82249957d9c4c23a796a11c5f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/75a72117d9bb1d65cc39.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/836a0af195498bb3e459.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/836a0af195498bb3e459.json new file mode 100644 index 0000000000000000000000000000000000000000..2587ab9273c9c275203f028a101b33aa4c33e8df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/836a0af195498bb3e459.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/29c1f48affaff8ffcfac.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/29c1f48affaff8ffcfac.json new file mode 100644 index 0000000000000000000000000000000000000000..697842c760a3f27d9d4eea86278075e368f5e58e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/29c1f48affaff8ffcfac.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/5c950e6de65e4bfcda8a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/5c950e6de65e4bfcda8a.json new file mode 100644 index 0000000000000000000000000000000000000000..4bffc239be96867f14f9eb63bb78fb75aa878d75 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/5c950e6de65e4bfcda8a.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/a41218e056c3cc880c9e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/a41218e056c3cc880c9e.json new file mode 100644 index 0000000000000000000000000000000000000000..77d45edc8a987496f918ec53547ed2629dbfd8fb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/llamafactory/tiny-random-Llama-3/a41218e056c3cc880c9e.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/218ac2136121c43bdfd6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/218ac2136121c43bdfd6.json new file mode 100644 index 0000000000000000000000000000000000000000..6136073dd6cb8b3b5be9c5a5f9f3e517e25069b8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/218ac2136121c43bdfd6.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/73d01a0f47581aaaee4a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/73d01a0f47581aaaee4a.json new file mode 100644 index 0000000000000000000000000000000000000000..d9f75c72a6e50636f02de93b7b0e905ca67f9eea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/73d01a0f47581aaaee4a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f70ec29e56813798e705.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f70ec29e56813798e705.json new file mode 100644 index 0000000000000000000000000000000000000000..04d752e945882446556bd5f7e1be3926ba28976f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/f70ec29e56813798e705.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama4_text/tiny-random/llama-4/6f976ae1dd1306830cee.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama4_text/tiny-random/llama-4/6f976ae1dd1306830cee.json new file mode 100644 index 0000000000000000000000000000000000000000..f465fe3135497f788419c67d550e0601f3c67ccd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama4_text/tiny-random/llama-4/6f976ae1dd1306830cee.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/mixtral/dacorvo/Mixtral-tiny/3c7054d9dc66bd2bac24.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/mixtral/dacorvo/Mixtral-tiny/3c7054d9dc66bd2bac24.json new file mode 100644 index 0000000000000000000000000000000000000000..a47d631eb8de36fae0833dfa44c3adf36de4900a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/mixtral/dacorvo/Mixtral-tiny/3c7054d9dc66bd2bac24.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/46e964a86faeb179e2ca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/46e964a86faeb179e2ca.json new file mode 100644 index 0000000000000000000000000000000000000000..26e76b694976d98cad2b78345aa3f335a4db905a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/46e964a86faeb179e2ca.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/e9ef2a108b5cf4eee062.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/e9ef2a108b5cf4eee062.json new file mode 100644 index 0000000000000000000000000000000000000000..e2b9fc051cb1c585323dfb39b2be88c044ec91c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/e9ef2a108b5cf4eee062.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/yujiepan/phi-4-tiny-random/24920d8a9e6a1b9184a3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/yujiepan/phi-4-tiny-random/24920d8a9e6a1b9184a3.json new file mode 100644 index 0000000000000000000000000000000000000000..c79805940bab072d8a65b0eeca23f96943261a74 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/phi3/yujiepan/phi-4-tiny-random/24920d8a9e6a1b9184a3.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/eab7ac727306f0ab8e55.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/eab7ac727306f0ab8e55.json new file mode 100644 index 0000000000000000000000000000000000000000..d28f14fa1cb7bc8687424a6a6814d5ec9dea5732 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/eab7ac727306f0ab8e55.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f1a4bb21823df0476631.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f1a4bb21823df0476631.json new file mode 100644 index 0000000000000000000000000000000000000000..e409d55c4120a79494cfa0e3e52670dc9af798af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f1a4bb21823df0476631.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f2e11686d5e508eacfa4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f2e11686d5e508eacfa4.json new file mode 100644 index 0000000000000000000000000000000000000000..c6577bab4d3c518708c08f04a7e6ed70636227ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/f2e11686d5e508eacfa4.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/1e7190a04a0a96096a6e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/1e7190a04a0a96096a6e.json new file mode 100644 index 0000000000000000000000000000000000000000..40a777835cea9bbf035a79e5389bd171e522dc56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/1e7190a04a0a96096a6e.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/60c1441f3219faaa7225.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/60c1441f3219faaa7225.json new file mode 100644 index 0000000000000000000000000000000000000000..f42eea6c2cafbbbfae104b4a1b1a4354528944d6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/60c1441f3219faaa7225.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/ce5e2780d37d70f0d7ae.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/ce5e2780d37d70f0d7ae.json new file mode 100644 index 0000000000000000000000000000000000000000..3eb8d4c9d9903b672d7a3d626085dddd4755fa85 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/ce5e2780d37d70f0d7ae.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/fa7742b85aa577a2889f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/fa7742b85aa577a2889f.json new file mode 100644 index 0000000000000000000000000000000000000000..aad93c69a7b30703806a68bc79ab88ef317ca23d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/fa7742b85aa577a2889f.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/177ef8d3b7a3fa2f05ba.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/177ef8d3b7a3fa2f05ba.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0c991a99cc6abd18acf3690c8ff8cc68d79d6c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/177ef8d3b7a3fa2f05ba.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/669a072053d85bafa4d2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/669a072053d85bafa4d2.json new file mode 100644 index 0000000000000000000000000000000000000000..12e341f3479626b9c2e9e72cd6db68a0c8fe908f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/669a072053d85bafa4d2.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/e214f5dd416e1edbc7e0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/e214f5dd416e1edbc7e0.json new file mode 100644 index 0000000000000000000000000000000000000000..220c892a4d1acafe29454e5759be0a817479579e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/e214f5dd416e1edbc7e0.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/ef66b0455dece5e18231.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/ef66b0455dece5e18231.json new file mode 100644 index 0000000000000000000000000000000000000000..249cbfbb0a552950d47afcc5eb081a3d8f7cac22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/ef66b0455dece5e18231.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..72148463b432d9b8ccfa77aba1c577ab74ded5a3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_55efca96-bba8-4c31-aaa5-f22dad9250bf/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be5670c6258a72b3253c262c04b0a3c084a1810d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc99fd8fa13fb76a08ae04c8275958cd87878cfbf86f63ae613d7efcf775bfc +size 29412 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a6b86021a6f2d5ff6fd1c84cf278a1305fa74e29 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930318af5369516c5301429db3a82490dc6f66bb83600765c2ac165773aaea7e +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6c0622001a2c881d96fcdbe0c443b6b2c7271bd9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00eca61d40aef5dd923c+126a7ae1/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad8176f336ed2ba1ce9df29c07ef8a452e3d6e1a3649cb4f740c08183195b16b +size 334452 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b64123213be94737cf87ecb9f29f0124f502c2c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f2b999a60c82d86979d0f6b90a6c682dbc9539e64ebe06b9bb34e9f8065e04 +size 84114 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7f0008e69692af5905c91702b5801170977bb81a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e2652844332eca1427cae990960fd98ac25eb584d577d01490580abbd0b034 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7bef34ffdb2b9dd11501cb4c581b75b5835de43c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04605cea463a0181e7ad+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8771520a6caf1dc15ded687cdf16046be2d5b5d7e47d134e71deb34a171b7214 +size 254967 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48dc490267aba7cae14f72274754f0ecd0551fed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46c12c65b32e0206e9008ea148d2ec75cbea8a14b82619342b844276c66e1467 +size 83504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ee56526d6e4025ba67f7daa071baeb946e3f31a8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04737b0b7802227caf43+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a798486c94a03ec13a55a103bbb0142389f0bd95f0920bfead42c5bed54ade +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8d267f32871c35f7db69ac7ac2c8e0d9240bd449 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2cde5baa98a83cf7d1f88fa3ed4d8421e81f106a689511fb8b313bf33c45b9e +size 761066 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..182bb57743e00af84228605f6ad173f3cb8e948b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_05584a522a6853ba0eb4+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c38cf073637de93c6a4c9bbd5b9773fcb49d5bc61b2e8b9ca9a78f59e54a29c +size 12493824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9aca70e88e36fd43d9bf4bc678919f56a1879cf7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7524acc2c61ca9b6b046acf11c51438e1276c64c121b3453edba59089b340e2 +size 1080658 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd1bf0790388455e95d965c0476777472051835f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21f6bbda57bf7e070cdc973f3fa7c2644644fbf240be8541b7065349c8dcef8 +size 6933504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8276e9897e2ab3b46c5bb702f34be69723c31645 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_089479b62bff8cacc87f+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10393c2cab5611ae66184955d37e5ee955eb6f40016323dcc12ac2c8d4bcc491 +size 7117697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..06610e1476d74d2bec672bc3678be13a7baa0541 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bcaf8a1f374dbd37d11df8fc015c6a3bdfe5ec169e15b6c6167b0561ac31cc +size 694128 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7c45b00a78f51d94e9c539ce3f04af1c060a05e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_09cd9547033302db1d02+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a294f59751c68c5fc320e02c16ca0a4998a63238dc675c81a4f6328db0682542 +size 625664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff index c7b4112259ee2cce399c2dddf7be78dceef9cbdb..d83f8fcd876524a113e499d7f3cb0a5fd0819693 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff index d8174b6dbe4ce10431be53bdefc207e72dff8a54..676572bb15946493daa6cff52fbdf6547b1e9632 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff index b7e4f90fb558fdf30bf3aa76087ed1fa516ab162..a40a4c91e9cb85db12326728ce0f1d911ac45862 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff index 023de07e7274013ab7db12919096eb991eeda9e0..c634eaec464583002d8c2e6757f7b01c91a63ebd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff index 545049854206f27359cc1bd5fa869aadc6a5e0cb..c3b91ec9c4ebbdb54fc677bd4f338583d81db903 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff index 21ce3c01928a3f4bf7efefb1116be5de557d3f6b..79e72b1da8dfbf1f9df7113adc5266d3c5baaaf1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff index a6fbe5a32bd289d62f385a559830d34e5dd0eb1f..64635c8c8c1f8fc016f9287bc3b24c1bb539f41a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff index a54e6f7edd43ffc2cc30259274400ceb2f04e201..3326e18ce01f778e6715cb0a2f301ff9362df119 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff index 33cb9289952bfe93220f85d7d8392df371ac3e51..f3c9dcdf081253148dd63ca1ed3038474cfdfc8a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff index 100e106f2d6d55f84766d832935ba3ad7b9df968..9dff22e850fd9bc8ce4f954d26ebde986c208ab2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff index 93d75af4c4b75a2e54e3976bb89bce6d0a3fd0c3..e2085312a03a8b1686cb0f8908263de1179b1990 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4e86b88673a4962826d93c5a9e15dca102112d11 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50160f95d4d88b8d0cb2caaeac0d394a6e3c316c2be404ff978c1b367431dfd +size 82653 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e78ab8f78b3ac72a67a9474c37513acbf6f12cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:560e291c18ae0e13cd4c74b809ac9c0a69eed02b93b829f21b8bb430cad3976e +size 216064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d284d01b66e3616addaa0656ef82903494594189 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_127de87388b95258c57e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed4c789494bda7c4193add75974fc27cbd033f8f3e6705fba96ed38f97e0714 +size 224275 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff index 54c0cbde5552668e51ae1053e0dc74ae3bcbdb36..e0384e84a97e49c822798b0f6ea171a5a175eddc 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff index f1b2633450f3d7c60b4e05d25ba5c8844d50226f..aedd56df1d60b80ebea0541359ee843e4a52cdd5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff index 98788e96bbdc2c931e0f13027af88827a4a438d1..aeab1e3672215ced0212c4ee6a2ff868c450d6fa 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff index 9905193816866534ffafdd0f8609c775480a3d3f..dc67a570302c65ba9e95edf98027382334261ac8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff index a74e5a1492d8346f56fa607dc7181f729d7bd6c5..cd682ebb834e9fd1843a1c89aefdac177220c10f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff index a9c8604139b4736bf47f16de91e88b065c558190..e506729e53bd52f2ff86886b2f92eda86f9ed628 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff index bf9d5c60ce1449d59a946770c6597dbcb052f3ce..0130b8b9691d1d994a05fda6ccedceba3b47c110 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff index ec4f6ceeb9066960c91fdf44a124c05739db3cdf..3eedc01d080eefee5163b2f684e5300b79e3a9d9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff index 733030d9fa1c0d131ec3519f5675d84457384c03..4be41d373e0fc331485582891e440a079f7f0b9b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff index 7d7affdcdb2b562e20b03adfbf7675d7b44a1349..d2bbb24045454f68515c1806cca31088f9534436 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff index 2c072bf670a78df288da4f1243a35455be679218..8de1901214edccba30f41c8c08188aa344b2edfa 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff index 32cec6780f8d7c2dcdbc736095c941bb77ea53fd..288a2664fa5d133a9655e54a056a445d3ab4bfc1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff index 3eeaa18a1f3ded4cc0149c978a223e2e20a2efea..8ec270b26f7d80647f3bfc3e74a089d611449813 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff index cddcd32fa67e27b7f995c345823ba30bdb9f51f9..35f414fed52680612044a5c9fd8ee595f185ae21 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff index 21e2c3dc958356108aa519c317c0612da804dffb..8e10bb7b04b5b664abdad97edd44359f61b800a9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff index 2d9eff660c32a5e05b88996073e3d14fffac585f..1688c633cc4191181208259a90a706043d2fa579 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff index f26d9b82aad11e3a664db0585d82a183958c4fa1..0f1d8163e377b769cd223cb7e1a8689bf6b36792 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff index e848c6368b7cb00ff455cf514fd369a2a2dd2a1c..f4bba3f1ab646dc427f0757b9e1ff08898b24537 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff index 19a2b9cf1474b0ff58c93da0bf5d7248db11650b..bffd295e18cb13dd06e60e894ac78c5bc47a925e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff index 3e4ce47cbf2e7da9f0103ae8d2125103cd46b0de..ab036a3c44eab068edc5fbd34dea762b5eb9cfcf 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff index ccf8ee3fa0f4ca4ed0b20976125b358c96031833..a333f0d1447b324debedb3532bf11ad4f2a0aa95 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff index f8989c63e2d136dd6799047d7be762489539d16b..613a363c47bcec6fe902713a51bbb4b05f7bcd90 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff index 939b07713fc652311cf3c6369ff5278d8f306b36..dba05c511a86b9f725d934b64dc95e3968ec2e8f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff index 55b1af5d75c126fafd64bce00e65fb4d19112a35..dddd326c9903f2899f5b66eb27ae456dcee9cfc6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff index 426df90b21e03ba85d380fa935870f33b4560ca9..80cce126ee84a364a008031edb40b17421f47b44 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff index 4bc8d0a0d6f26ec3a15822ce48d077b0f2d0890b..49b7767327612e21756e90b0fcb081c0de68cff4 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff index efc4db6b0b0e2b2ab97fa9b4688407887dbe375c..1b55262bfc4f12bc0eebbf2c97da6a89c2a1489c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc86b6948dca9cbc2103606d9a3944401ea9e697 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ac353728af7b211238fb3c36ad2d92341eb61a5046d8e51d5357e3dd297b94 +size 726810 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..22de0cbcbee8d593dbd11487eac85677832da974 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_16c5142e54049280b3a6+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68389367bb30af25de76a4c417e2d0c446b2562faa97b2cbb36aa2a9256dae0d +size 71302144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index 8ff9dfa5a7929ebd7e63a323398357d43ebda770..9a477e038f9b6ea6d4bf9188e55078e16af947f7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff index 3411b5f76572e024903885e3785b60a1b2996948..f41469fa4846500e58c93a0f91e0378adccd5d60 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff index 137f7aefca057f6cdc8ab30a940692b686b0c1c9..0b4f3ef045c8016b4411b2e4ecefe1e5f9ba6796 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff index d4184bf8ec4d04b6998376672a9fd6bcd6761298..a49900fc4292a24723644242cd6794f2b5d438b4 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff index 212a78b93aa12c4a995d513bdad26cba068f2fa5..344de9682a4822f42630ebaceaa64e8f27650d55 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff index 90c16d2d82105a5033f5769e166a947259b651a1..b40000564525115ecdefaa3f31e6070686afdff3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff index 88f4a339395c1855e3a3a7b9a420f7919395acef..e5134a6e29c14e98d4a080f4ce5d38be177ffeeb 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff index c53a7e8a00b9fd77fccd7d2e94d9c48dc1e0e33c..25bbb0873c9a27c0917edd6c72b615a8b268bfec 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff index 82ae3bef85a9186f25c1ee327b3fb6d515468b4f..a21f561ebc32de1ae411b6c4fdfb973c9dc8f7aa 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff index 82ea7379f23dbc635fc56e8d028bfec62c113849..9fe8c9d716b48ce50e10f1ae5c7087005e4744b5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff index 322f95001695ed228539b71c184e5d006fcfe5a8..51b3daad5b086be65293b550cf6815cb63964da6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..254e0ad688d163a671b1b57f5e44e86b76ceba7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63fca0f24dc0f69649248d5eac0c2f0cb77dde6f88efae6af8e063e9d3d06da +size 85024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67b2192140936fa3ea765ac4f42a52768d890c16 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5beab2ee0ae183b40189a51d8ddd3b6bb09b599e6c594b717a8405394246ee94 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4741e31cb840a44fc80c37d6db20d9238290d581 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23ed678a33869816b749+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190c8d1353e804bcd4c147b8c311dc08fef8d163541d05d980fd3a4e84492ae3 +size 285717 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff index cb774ac19fb5e24a89fef396c14936a9f678fa88..7b19e3814cc60dd7d1121ce04e90d05aef4be493 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff index 3ee21e7ef37271718f32a882dbf05c76415f8a30..c36daec827547ed6fc12b06d414a8586fa9d60fd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff index e5d395ab2f62d00eb8e5e6de25c625f6e2bdf696..0485d4f3cbbf48d61cc895055cd5d41764dc95f3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff index ece539f71c43f80bed2e5201d708d0ad4b1edaff..2d54a8409bed4906157f44d75cec03c166411f1a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff index a25b15479e3565460631f1659dcf59a509844288..f86e412238292c3c1d649d47ee49c5a1ddc075c3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff index 2047255edcb84049d62ca3bdd740cdcd5ca2da3c..47b714e9f79a93bc54fe6bd87d9d0a1706eb3369 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff index d7dde7d0a7b238774c010b38c3c45c4ac78e3fb7..96af30f29c4f7672936553433c845c66cbde95a1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b8352e1ee9da452fd713c1a756c86482df2737e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e635e3fd90e62f2f22ffdf53f78d7bdd4fa50499755a35b844ef028afd5c49ab +size 593552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..33373d98dfeefdcd769488f3d0d2cef77d01549e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b4eb85450c913723ea39d49391887a1c6eef478507a012bc26df42eaae1627 +size 3769344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff index 93d902266d6b443a4e64a26920bdf53da63deee7..502a4a265d3b3c89e5fe0b0be05912efcdfc6726 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ae85b21d79e4d0a352738ffcd1a9f0d1b3203603 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea714b78ca32bea049c7686793fd46a9880937a987670f765b3c354ec5a279c8 +size 1314630 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b394e571ef2162e369c56049d62059d1419fd3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ae5bed47f240a5992ee+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d3faa4a6fe1bb3a0f02b1bbc81b98fc32edf8a77e0bf3719d745f166ea182a +size 728064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..62ef4ff58a28daa199c972d47f339241fd8db5a3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc92b67af798d370c93f9f7fa40a9eea03f3760c3bb41af5259f27bd5b0c4263 +size 961456 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c0a7dd181362c7795a34c1cbea25275d4e3e95c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3b37aff244c1128985f5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8476d85cba12830b772ba3f157a569bb0c2cb2bbfd65c678b90251ba6f06ef5 +size 14654464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..593ffc256fb45e4fcad82dbfb1fc705eddb0220d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1208e3c6aa65b9ac41ef1f27ba61dbe0e3386f5121455d737b379be31fcef5ed +size 982549 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d722f0b2c894a8d68d25262e1a96476322a847d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c3275e8a9f242f022a5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4982c2d39eebf629ec91e57f9a85da471cc907955a7ef0ae4523edeb97e47d82 +size 22774784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff index 03a830613d8722e9bb44e7d0027da75cbb94e314..ecf19094854c06491c1af77904973dba9e21002e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff index c481630f4d6b96f97156326fd540aba61f350739..1d24bed6ee119748a02799ac5126f574d059e2fd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff index 973f1183351546f284b33635dbf87a5b1e5b1a7f..33e3e606cbdd0a8df1014b93ea2b4fbb529abdbb 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..54aef1b9ff33e67fc81764152d7dc18fd2c3257e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e85d3e2a3ebe4acab2722835a65a1e08488dc52ee22d8f328beef73f4d6686a +size 83504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe935810cb0fb5bfe53619b1be12293ee9117c03 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4183d5be6001030603fd+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e59b6d51d41360d52fb68989fa190a7fb0d389f7376caa101bfb5012b5bc7c1 +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff index fa3d03aa070254dde5fcb1a3f18ff75a2c9e1bc9..99505e4d4a583806e034895f623b30d11ca8e5c5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7e0c6790082c43cd145aae8cc7e3cc89e15ca8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..98b365756a1477f30021f530c30076cd0f9b3cc1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cbbba5e64669f88f166e6e00ee0e0e5e2e400e85216c2a34abd2f7cf6b4d57 +size 423848 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..984c8cc114b3d9daae4196e5ef658ea8e86f64f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4386b9df6b20120d67ef+677eeb9d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac85e4ea5ce014b40fc0e2ef545793aedd869c02f834be33ff3b51e6f9112252 +size 3687424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff index ae0c09e5c3302c5e344b188a9646e18a0b6d9574..2cf7def006c6ac448f3c389110115b1c5aeabc5b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff index 3eb867076e96834daf75152b46833c60743404db..8021df47ae2515044048849bd5a70634c372a4b2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff index d4e898a8639d5b8b4dd4d6778fb7f6127016ceb8..410d354b366e074dacd9282d9d8559e39ea8e660 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff index 5b21ae829009c7bcaf0a16c9d363deb9bbb7faab..3aef95d1f8c9f6df14b6390c27e76f818260a253 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff index c22c11b7c93e850340259d3cea3c0e89424327fc..e10706cd00c53b6580bda08d038315e4dec34225 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff index 4f2afa442e7273fa7b6bc57057dfebfd07a801c1..98f2b86a9f64c922460d3857a724d97605ce7e78 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2f60cf407a844ffc3b617b274672033d23cdcbe9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3962d9250fe3f1360028766a5e7fb9dc13fe2e38b3d3e3361a28b89ab70ae97e +size 1600143 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..23e8dbbb0c970c82298fc93dbc29e72683e2e820 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8890435cf077d9ee7a7ff775c731b1bf22ea226bcce465abf3871406308bd83 +size 779264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..329f79eddff4064edf82c34086d1d4804b9de80c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5242a3d79ad5bfe6c6dc+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff2768dfa4f76bd4e6d676a0ac12fe4d4660a025f44d310091d049aee9dbec1f +size 787074 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff index 791a3a08e32e10509b9aa5b929d8467c61582a50..a5b36ab15b744de339943d8833e877700aaf268f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..02b1420de36744df3a46a42a41cb0b4e2947dbb5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2143e28e1d0b82695d02278df3d2442182f6e68a4fd9fd84588b6ee3a4ad4f47 +size 739558 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f8fd35a27647aa1d5b4587626465e6bbdb0d0ead --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5426f921e14368bcf904+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b413cede89c12b746351746eca8d15edb0957e6056e2962a77eac01c47585cb +size 26133504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff index 02e23a70e0737f81d2b9fada82007fecc73b4197..ed1b684a77f9e62f7678e73646cd6df01eb69d26 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff index 7d39b65031533c577ab2aa940cb75fe7c80b4a50..bc04af8751cc35ea797d6058f658c3d121f1ed61 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fecf633f4f292be1efc7d5e861deb2775708dbec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413e24966ec219391e013f4338cbab4c3cd1cebf49080679fa1dc73f26a51196 +size 699875 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a2eab235b95f8739ec984e0181a53ab11ac1ac7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb478847dd694f7ad0a83746a6410c240f0f462c807be9ac4197cd6e22395740 +size 12411904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..89944170f339c9a2f63698f8a85a2a910c272149 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a2ed470eb752b572dc5+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831569ad3797f3f05f180da2720fe57b55a1be452113afc66b7e4ee63b90a894 +size 12558920 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d77cc4623d37cd6d571e53b5665e0d5210d7d626 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c3ecd0053332b2176e119af22c32ebfcbabafcb6767c90dfc5d5e4773dc52ee +size 1088079 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..60c0b8e16111cb8eadae1efe1aec09c2a8f68d84 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98308e45ddd448ddbc8a54dfab20d673adb007ac2926d05dc406c8b88377fa0 +size 5008384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3724400eb0dba1470ec262d286969d4842df2db3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a68e035d2f868ab14d1+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e00ca7c5c487cc4686abfa7e8f7ea115e132067590de051fbba1fb9db8d0ad +size 5192563 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7d85cd3d975162683053faa369dcc6259604af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..69703c783a5c5784dc6800dc7651dc774d0f568f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2f13c85b13166643ce5e8a1a9ea9c9229e7ee61e288a3e0042e7067e4dff041 +size 72553 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..551c4f22ffdd6cbb1cdc2f9cbbb1b7ea64d2cf1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae584337d3ac24c64d0ac46f5bed3bf0cc822cc12059f4677303ba14b016c09 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cb3057c8dc43d0cd9a8c1df803e132f1e9fbb8d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a95b2e154e7d24b16d2+80d05c3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a6e4eddb87d4eff1e83c77a564fc725b53c5725796e5ca9c3c3b03ba8a8661 +size 289434 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25316c74213e0ec7de77c3d6cbcaaaf2e429ae0c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47919894c59c7495d63e02ec2f2397683b3c455d68d4289feebcfdbc8b14464 +size 707389 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bc349a461e896af931c95bcddfef04231820cb51 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b8742701edea50e9b339241213eb9085d48bcdcd93cf3617bebb4be8188ba7 +size 8920064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a3a9450b1bc848b6a1701608580453ee13cd60c7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5c3f594ed06ee38231d9+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c9cff6f5d47fc8f0ba776e3ea5e9665990cf3e7bd2ddaa4dbe27537b55e1c3 +size 9067080 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2c172bca0e22ba61551f79fd63de8c84653966d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00199479d642f8e41b2d2d86073cc56342f8db6f1f5b0802ab804a154a158ed1 +size 596952 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..590c9e31fcc80fa91fc27d478aeaf1028d1bdfd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a2d51dbe91269fb6a6ccf842ec026c043b5cfbc28290a846f888fb033fb31be +size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c91880f57e45c8d88c153a353bb25261b5b13637 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ec8c8cf0f5bf89c9d6b+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fff4843d478f5275137ffc3309b2291909f7f5ef2aae82cd2765d10071ce3c +size 2092575 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff index 7097405725ea16b83e1601bd30e180c64e42a343..f00504082a5055f36c2230d1b05e0a6931e522d1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff index 8efd4e2fc837530df5d57c1ae1c7d134649547b5..5b9363c608b51f9d9245a4ed0d9aa4ed3325c7a7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff index 7505d3e78faa8d0b6ce98078cd0a5ff9b1fab28b..033cb4e1311a2a2272a6dd3297dea58c1dba52ce 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf0117c17d50244b7176c2a9924040b83a37180c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f140ea6362f69c0f98cfd36d894ca317980049d7f20a960c905db8b3d5c80ac3 +size 80789 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..94db693717eb1fadc21893938e754aa58c62aa46 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8edc1cf916a402a5b8dab3c6b57541b78116ba0eef5bdf1b4db0c6b78ecbf359 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6ca5b027e33ad7997c546c26a96a04839186ef4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6436324b782a1664e4df+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa4cc246328ddd7b3c536dca9db723562c52cce7121d36a443b29727efd7342 +size 295957 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff index 7b90370962f0108841ea2d55ac2b00c38259b7a1..2670a7435343053578eac2074f73b54965653c6e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff index ba78cbd6fd9dd9b462dff75bbbdceeb6fdbfaa2e..b95d1080cf5ccc2c1e85ef791c45c0b7c02068d3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4bfaa2419bb7160e01db42545fdb46d246731fa5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf024c0dd89be8a97c7f8563689252d82e0c79df1b16a5d406547dd73045aaae +size 843918 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3feb97413d88d656506d60444ef3ad7cc6beaf84 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_682d61505f84109d4ee8+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069a9f7e8b5a964a42ea2a63eeb49d5f91bc7ad3c719ceca000a856c2285143f +size 96881664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..abf3dc95c0f758c351b8985f3473a795add9bd02 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29f438087c8183891b0a906633385de9ff9254877beef03c4ae441492f5c570 +size 923054 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ac3aa6fc96647edaf30019c7c3e309e1825eaf6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68dcd52f0ccb32584504+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a0679a4c53faef93dac15fbb16bfa557f58e1defa538f12df17745ea36b18b +size 5223424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6887fe6d72c31e33dca16aea68e9f1d508572cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635244b46a2c44c27abc6001ec1aa8b610e50a18d6cbaeb483b6ffba745cadb6 +size 81516 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a1087283e6b03440319df546aca6a7bb93b08471 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f75af6a30babfcdd019+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586ba202665cba80c198053d13e47976ce4f8a15c28fbdf553586e8c56d70aa8 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff index 3f0ec73b6b732a095416efafeec4435958b6fff5..2fcbc3feae079d7ba02d3294602def553bee821b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7e55e18b2f5c701330b31813ae65b76203b86773 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad5da259ac757173f7632c251ab0a98c2f7905b17492fc3e6ee4c02dd415530 +size 442335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..629f0cc4095b9dbf178bd05fd0aaa161ac47de2d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_728ea12fa65b10279163+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5143a233f5450b98f13d5eba28b8a0f12a0fe007e70cc2c54b73907a051c088 +size 41114624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff index ba8aaa950ef8e1a1b2cb214ac897aa5fc22935c9..72fcb672d67c5f2fb4273cf3af5c03f7ebc474d8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff index 5ad6584e46f0eb6feddac0f0a0277acc0f84b8d8..3dbb870c74da00a6f90f508ed35d300758a6ef14 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4575e5771ddc55a140255ed21ad65aebe31e6eae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8809741b21f28e30c3ccaadee74571c52a65eb476f56a6caf7ab4cbf36f8b4b8 +size 84000 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d78f5ca4d0553781982a47180bfdd0931e25568 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ac8b7a62e7155bd2ca7be897cff2618bee0343b8c32f782c69e12f1fd5d01f +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3756314f3884389977f4e265c2ecf01e5bb94e05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7adc42e2d34fe96734ff+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76252e81bb93cf4557f500c4f92365f350fa5f3660cfb7c34277689cec253446 +size 285717 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff index 8defb786bab47631752104d5bae950ad80d4e305..f13d463b1b8dd811447f2285abfdf5b8a974715d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff index 1c0cf6d6d38017503bf8848ac0aeb03421b4b8f2..43d98273e07ae65d6db40ad27faaf6c8601acf4a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff index 6a3c343258e492654ede10e4050289cd41d81a11..3dd686436af4922ffebff9ac081caa7cf02ad807 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..27846074fdcb82c3a04bdc991b67f7ccd47fe2fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167419b0c75579c95b0be519a753531e8e82602d6f6a8c9083f6bb6333a74b83 +size 90382 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1afb17d91c0e1afb68b53bdeead95eaca516f70a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_833bb585fe5b0b130f37+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4f609a202d6d782c9d3e5e708f58d7466b43c85229ee127407686f8dc27297 +size 359424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff index ecf1a099e701224e5584e36161fd51212abeb757..70a04f5ffcc163f6e4341e90f0934ed325cd1f93 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff index 8d620448021b1b2192e929c2504f17dbbc3b7acf..9cc19a46375e5af9094257dbf64fde442c874985 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3347381d7f73469556eb0bd3035a41c30b8e1f13 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_00144433-aff3-419d-97b5-2c78406668c2/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..90882b8af4a600d78af852e50dfc2e1e3d751ea9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58367242601db0a7ebb13ca60b9cf79864afa341beca682a60fe80d8366e1858 +size 103424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4c5df4dd27ac108a6c91922dad99740a8de5827e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8bfac9bd88b54f53f679+9ee9f232/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19701dc15d2f8af8827d670c613d2d8dbdd3895994e98db6898443157b85f3ae +size 104320 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6adc8e21ca678546dd3249064e046ddd75eb4a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d845214d2a88cba3b82d220ba69115b804dd26fbbffef6cf554e026f84ea4e37 +size 474402 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2915df74188b328a1283d02507d2403c8e957883 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7d4fd225396fab4b590404c73a227127c02036daf80be8661b844df8b48c43 +size 42322944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..605f7321db33b4ce8f6263beafd7686242c0eb5c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c4ea37c2af2900459a2fcd8aab12aa49178c6ec8ade5dfbf5cb8237c523cd5 +size 1061093 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df321e152a24cc3816b074d2aeac9bec7b76331d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_90390e7549d92b97c345+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e45a60f784ba7390794b6f205163176c6ca03da0759d70392511c589f92c48 +size 9473024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff index 31106f81f69e7327e37842a457f545d8d7024981..7aa3132f45f99a978c1e3fda3def0fc30f72e5db 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff index 0d48baae2c595dc74ec94e90b5710f6dd938117a..4590be123828546d77d4f2da506385f7e1024ec4 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff index d3e418ec1d2ba34271dab68e8ef05ee0122b02b4..d04594631116f52cf0ef825f4e87bb72ffd93a59 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83d61eee63d1dcc16dafff5a1ad4f8225b7041ad --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d29649331c20c587922d5d0e2fb8d1e288e2142a64bf6feb2e0c04564651180 +size 758053 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6316f5419f34fd342f220abe6c1634ef14b6fca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecf5d714b53ef2932f727d5185a85bbc78ac68d1b93d17561782db7a84a9e8b +size 2417664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d7a9d65dca5f94de4c6458a1ee659867b27e9173 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_954404769badd1a9f817+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a250f3ee7e3279f384498e9d3238735700f721908e42b6dadfff1ed4b3c8239 +size 2554832 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff index f0cf45e67b137d9bfd7b8234fa0d29cd30caf17a..57ffed90ae69d6caa8fcc8cb627d370ce3bf69a7 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9a0e905d5e65f1efb152f143fe4d78385db0f8c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ed3a7af30ff497bf3aff279caf7ebba01abe920a8f32fc1d43ad70a6d42d3f +size 93425 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba49d2e282677540fd6a44d99cba66f1ff3d3da4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debf2f445f2d91b3e81f6c561957e6da701f8d65a0b024362f96572ae01d281f +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3d89fcd47820fb14993805d6c3ba0a229b231cb1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_982078e41304be19b706+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02efe5da4c68467d9cc360f98fec2ae13a5d32bf0348dd70078941ef54c2ea20 +size 288898 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..512b26a58c3747500d438fbb832acfe7ba98c423 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b5f6110349d5d41b2c9ccc368115493ad6bc3586827be71a6661f797804a85 +size 854266 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aff7f727669ce28818815adbf0551fac735838ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9f9007db4d17e6304d37+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcfb99f9f1ba36ec425c2705ea0468686b78c8a9784066e20cb831aed4f5e7e +size 12248064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb9da9938faa96ca43962c24ac9c114777c0dd4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd08578a4e4e68ff261cc258981b4fea2a3be84d1f5c87f5287e9c016bb03871 +size 1050679 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fba945c99c3f55a1a404dda7aabd3fc9431ea2b4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0f36ad51d6ea0dcdcbb+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dfad88dded44abf213ec8d96d06594c888361b6e60c3df2af7cf59eb0b3ad5d +size 5827584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9a71f3cafcc02b7809993c0a0dc65b1007008247 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_84883d79-230b-461d-84ee-35897394c1eb/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..49c14aa244d8bb0d70ffcae6277ef428be498c9a Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_a7b46ab4ec38ef92688d+4e709972/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5775cb312c77a27a50fd184619cc77a58509f3e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c9345a8bb967edd52fbc50aa196abd7bc0bf57d13ed08d31bad18a9d508c054 +size 392910 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..908f8a1116410e4d7ac2787b1281d665c2d01925 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8fc2fc4fb9affdb3f34+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048cb04306b4c3bf3ef1cf3f5a1c6ee5e5acccd561f2a1820a6dd7af61fe6d68 +size 41626624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..84311613ed1b5d8453c9d3951ffaeaad7a2a0a77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb77460869c0ddc335ba996275900c00d7feb24c4a5a259af95cd10edae234ad +size 82772 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d175eb3c646edb2e59901e0077c028cd7c465dc6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa0c40e8632cc47ef550+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cdb1cc112745c708003f5471e792e2e337020deedf0687423bb22d0162cd810 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..01472ce06fb10557f5aae47626a0cff562a63339 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35606044a5fb08adc447e978de1dd6c6b48b4d4286d15e4055bb2d582116b292 +size 643371 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81383be2dd8db4898cc335e47a302e9d2a8b24a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f0db943bae305a68c80774a9fe26ed9f803348b9759982bd1bb945756f1787 +size 1721344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6154ef26974565b5fa8ad87c95bf7fa4a4c337ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcbfbafeff377b2f5566+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c890850f31c43d590c8728eaa8097b04b52f04486c7c1b77efb67e947a11b328 +size 1877650 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d95a1fc80ea959a56b877a27309be4f4e35ede36 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_3061d177-3787-4858-9ef3-97e56db7c0ac/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07bf62460bc7813e4a95f208598e2fa4ae551d41 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dc525043c8d0fc6b46382b17659e6a2f2e981788c635021a8a61aa832d2866 +size 14480 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ed689d8bf6d3f684d483aeb5ef5c42413c1c6eda --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfad33bbd82e22e822e16832143f7608c9384c3df0a7261f99c348f49a075353 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..09226b511c3993dea2933d289defe2ed9533b863 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bcd6676adef3c1367da6+12918b03/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10f6410ef8ab053c3f321a7a2cea7fbd4f547211f3298bb06e58c4b4d214ee6 +size 272962 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbf6b9366d9e28d7f19321e85acd4cd96c0614 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bd6c5dc0d2fdfe20be8427817645687f659b8f36 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7dc2328bcd74f31a28521edc69225b93c50143678e31d5fa49d480da8e4645 +size 97794 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db9a61ec2754323ee32c25390ece0d6f2a5a3004 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d12c75d4a55a1f29a67c+283df001/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90cf80f09287116aefd1d77ef7ea52528f0ddd2e6b1fd9d7ea8464b77928ec9 +size 410624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57fac9aae75b84f85dff08aa7209d0def24bcc06 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4a6e09f14b38a1ac74e900d600fe09e5ca3fbe1c2132c4f8e292ced22453a6 +size 865291 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..16be20755c09e6658c686efec098d0e462377968 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790df002849d6ca576755ce6f33bc65f62ebffbfaaabd6164af828ee153409f8 +size 4967424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2230a3e1a03aa90eaa1d481ba003021804970fa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b12c256e167655df02+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd1389c27337f4d53fb2847871053a05393caa08bacc75278a0f872ef98107f +size 5133948 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3a7e49f61e264a21a84d781cbb6a7f55b25f6b19 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_0dbcbab5-d098-4947-a495-48abc1ccfe44/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..131272c11a995b4f26074cfe40e859202493c35c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f192ff24a6578c4f0526978ab11de474af2cff137fc0cf5781a960b40ca3c2 +size 1444864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..717687dea1af5e63ae5d3df5544e009ad1eebbf8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e2558041b24e343829b6+33adeabe/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e772121d8759f5be6c5ccbac212abbe0edf2df283d23a07ad99e8dcbe0c178c +size 1447845 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a40c022faf4beb0fb20246d9c1f9c8f32c6900 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b77cb4877256561d4ed203b0301061289d02c959 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef754d16b3e82aee5107d0a7fd0c7161d2e147bdd92f45baa8257134ee653eb6 +size 91833 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a0c43b2ca4915f9b75e39938d61c10ebb577826 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361e59022b7e709b6c5f88b5a15fbcaa9c77a1824f66879aaf0fbc1f83b74c5e +size 369664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a032e79974b30a8d1ebdd01e48e5aea203f16cf5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e43fb6fc70d6ca48c81c+2dde74c7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911e9e986d71d6cc8d3868d91fd2f58501b68798b80fc7b9d515c2fd315d9ed8 +size 379225 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..27153f3e8773fa579a76b0e688b45e700c2714c0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d422cf1312c637124b44630251154b29d45c4dbbaa41c4d6bc53af9e4677223f +size 388429 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81de4040e3f6a68fa065b4e7af59849774bff35c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f933585f9a633e59fe4ece9f5f4d9a86f1a6ce289abd0180f2b8e397f50abf3a +size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..50d0e1490978a392fe41d64a53f201b81dd5e518 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eab0a664432ada643aa8+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7490b7c71770dfd6efd79673a1cc0bdfd30a313ef7493dc2237e344a10bfcbf3 +size 2029297 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0814bdc1e3967a98799db4860176c3bd521880f4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb19a3b824c46ca786a7406154552f2c93b89609625040b8582ba81f9326e096 +size 872065 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f16fb9a24463d53b7b503c4f53ee9fc7b2a71c3b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa50c225710a6829ef2f0e3c1e4324f8ec72674cc6928d1201423de1b51b482 +size 6759424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2466ad6f8964bb2d0026456d6977f291ce8d6fc9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eb36f973e5afaf932d84+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabe48e3b479d5931aa6b6c2270733e726c6f54838c700f1d11c8ed518aad531 +size 6925948 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f3db015845cbd0881016290d2f786a9405536138 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a42a79766caa96504c73e6408df9337f56988798c2f055dd5db56b9ac22473c +size 79552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..943ec48654d1345336c67b5fd7f214073b0a1bdf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ebab4b006fd93fdc9f5b+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0edf3b1cca13e3cfd49cf7a31cb94f254064ff9e1a748bd217ea31bbfec3fad3 +size 338944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2ad69cd01383a682715fed6c7faaf92a72533ca6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1c53d56b8553bcbd02baa7d92d8df9bfa2cde69b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd7b32223098a02834ecaddf6796f7ce5368e71063766f568fcac25c4afe050 +size 84807 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c90070311b5351f9aa3cc411fff388a737667724 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ecf84edc7dafa7cf47bc+6e4949b4/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970168724305facf6604877b2fe3a6275a7ad171b1ae0a53b3738ea82da78efc +size 646144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3370cbdef7ca31160808eea0d21fa3aeee228eea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6413e5c53d8be274da5e0ec85af6c4b98681a6fc93d84f9308e07ced6e3a28d +size 678755 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e42943a9aa5761e0063c752f78f219025644a583 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4b7231e48802923ab8f7ffeb5c0e66bcd3320d2d528dc86c4e630ab744949b +size 533504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..379a488d7dc359166086b987559d6b4a350a6789 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f0c0b96c282628d572b0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3cff5f086187bf53e95651f652ae25c6a0ce5ebccfb1807f3ac19b025bd90c +size 553005 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..087adad3be38716000a37f13a671422143669eca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a82b3beb86fc5ad8db17a90281a3d8d1e499f2379ca89b897d6d0517a715c4 +size 590808 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c53b37c243c2e2e87fedebc86ac409e92b780fac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cea0388cd178001a3724c6909483f831b5496536d7d5245004e83e0325813e8 +size 1547264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a71e583c35f5951d981d3f5d4eab05db50f4a6cd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c6ae50bebda5bffa3afbf578373dab25341fb18ecc79c6db934e6fe781e183 +size 1703455 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..35bcb6257ea5d63372aa7b4bb2e56c95b1ec6e40 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71587335a52795b548e7cd718e26c5dd0dce406549983999a534d5a34836ef5 +size 879627 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3feb93381330ae7a39f73eb4675270e9f0d89fb6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a86d7e4f0ee17fa765fa7f96c834982bcc574850fdb55b69fb75b5757a43cb0 +size 5397504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4b912ed4e55cb84ba8c82d82019ff75bb695a1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f20c2d825c9eea3ef8d8+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e442898a2c1f52e28a38a855473717df57346ad761f98d52d21cd68887c55980 +size 5564028 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..47dcb45325cdcd5c905ef7d473dc19f3f07326c2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a67e3594-5821-43fc-82e7-b53b8cbf3d04/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19b019d0793a674978fb4530cfa3f7960b6daacd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a544c06ae28da7be0e286615f95034a5ae59d6d1b27a36c327840817b4951d +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a63e33720567e34511cb15e0d7b4425bf0617738 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f582554dc6735b2845ac+8dcc0e6d/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68f9f89b595866751f213ca920f991732fe95c8d6e3e372a8b7c5151a696d61 +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47dc969631220fdb40e5fe10f7bfae7a5454d613 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:018a62e9a6d7b944baff314113a3d60b4bee472c3df99216573823afea25c4f2 +size 426072 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..65a06112ea68459fced3d0a73293c3aefdfbc2e1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f58a7a6cc789d9f2165ec475ddc50ef9f89395d51b4256b51f75c59dce6328f +size 2100224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3283fd4111606d49c63a95c6723ea6d4a588131e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5296968659b367ab031f01c6a4ab6d38a8c39de43cc95a321ccfba4fbcf3da +size 2174311 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7600234efd490c9f80fe1a110d53180f09d06240 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_cb833190-5529-405f-968f-b8770c61955f/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36c5b85ea26f19cea3494d6e361e75f2817c2f54 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7996d498757ae3c966a7c1c67e704b54800fdd90cafaf6aa3092ad2821286e +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..80abd7061ffa0a3df4151628ab08e14a049f6d52 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff4e930aab914278b21f+9b5c93c1/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309adf7a45209115890f2eb5a56e8587a897d3880b82ea35a99da27cdf2c6cb3 +size 136222