diff --git a/.gitattributes b/.gitattributes index 7459262f23adef4ee09f4b811591d06a6ad85350..582277e40a7cc3db86a159e34644e6ef61a6956e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15491,3 +15491,20 @@ neuronxcc-2.21.33363.0+82129205/MODULE_17602083329285337032+bad9cf09/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_18241785500365933890+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_8676111827325322899+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_9676093068425877505+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1/llama/unsloth/Llama-3.2-1B-Instruct/2f94cc50b1fe1187a15f.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1/llama/unsloth/Llama-3.2-1B-Instruct/2f94cc50b1fe1187a15f.json new file mode 100644 index 0000000000000000000000000000000000000000..4b2203a59dfc60a6949e2f28de19e692200dae26 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1/llama/unsloth/Llama-3.2-1B-Instruct/2f94cc50b1fe1187a15f.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..baac6c921671d93b569242aa35bfc7c773100bd2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:050e63f119bf3b1c7b360d1695e06cba77b8babce4ca5929c4d2a6505748c40c +size 379962 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfd6dc5472fd8f0953060af29254fa905e5d23fb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d12b155f689bb3e299c8b38f5974a8db5ee77b046caaa391c9c10775fcfdf03 +size 2233344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2520dfb130905146f8bfbdb925d598bd16420c9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0953a1715d59163d7839+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f044fdc993c6c41c8ca0edb6f4a0e938e87dc56264da6f92195764c21c372290 +size 2326403 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e168cad45dc567c11fd62445eab39391b6c7ddc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c030cf106862196561918698b00a1a8d3bd844ca252156e4e1866eeec7404a5d +size 474780 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa950390724dd5459a3dfd55cc735f1da45f7e69 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4151fbbf8e0bf9b207+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21cd68b725d2b9fb19a0c95a4a7f7b37d6ad0b262fe043730355dd19f70e9a5d +size 10077184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..661ae822a624614b3dfa90b77cafff4152c17d70 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51141ad1492d98f0a5e85e1c5d001b7a8143e376efc09cfbe3f69b553e551a56 +size 440125 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d0c8530cee7ed647ba041a4613aa300ec0cf28ca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7840bb067b3b0dbbe82b+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43196fb735617a4afce0afb07f4f4c4292fdde0c284d8d991fe4660f675a138a +size 9544704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2c0e75213451450241a67f751b2cfe0b65e43fc5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c324a26c6bf6553bfa1b479b2ce5f8b4a0cdc73ff0c57a302ef3becf9f1853 +size 383363 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d470f0bb1edf16c121b194e260a5704b4535ba4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b6abef2e8972ae04728491572c1da70674ed11a269a59e019e38283f061d5a +size 2366464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bc6802b1755b554572098f3d95507e4d134bce18 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8840dff3517b081d95d5+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166e770e1041f4f19e0401fdec3ed86959f280ec686f552fb963d43144d48f79 +size 2459499 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78fa1e9d05ad42229711b6f2375c962a1a260da9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c708c5f1c2644af228bf4379cd7a61f407b20b0d85df3430c203e5538a2661 +size 471147 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8a103f210f47cb9f08d4d010f488e06d97e05928 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_99d403ff6364aa8b581b+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3a9f90964e845fc120359c8ca5dbac32c8f01aaaf69fd94ee9288cb58a28f7 +size 9667584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..032bc9c32827b84e3ce7b625b7012701b8e4782a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c9ef27abf72510a98fd0750e8ecd6e7d6c24a22af8856a19ad8b2efb703ab1 +size 443756 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..64004a2a2d418e96187ff7040b4b2a3b7a846c58 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dc91206bd2eb240d90b3+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808859c75a8989f793f55b52401fb5af231a63ed6c657ea25d6115dfd1a42b0c +size 9944064 diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3657edd95e4d922f714c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3657edd95e4d922f714c.json new file mode 100644 index 0000000000000000000000000000000000000000..0073f34e11a12aab99e1cd53c79938c48206269a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3657edd95e4d922f714c.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/37f7f1639bd7680a860b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/37f7f1639bd7680a860b.json new file mode 100644 index 0000000000000000000000000000000000000000..11b54301cd40cbc46cc5ba472883716bedad0df3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/37f7f1639bd7680a860b.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B/99ab78a5cc863ad14ed8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B/99ab78a5cc863ad14ed8.json new file mode 100644 index 0000000000000000000000000000000000000000..b40649e4c0c51cd8dc2698cbbb73d89c32a6ca85 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B/99ab78a5cc863ad14ed8.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B", + "checkpoint_revision": "9535bd9b1d1dea6acafbdc4813b728796aeb28da", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_238f936f6792c3472f34+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_238f936f6792c3472f34+24129607/model.neff index a46fd007fe7a77c917cfa00ecbab17f20a45f0b8..496cae40d49bbfdead67802be382aceec19b8247 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_238f936f6792c3472f34+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_238f936f6792c3472f34+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:190342af9d8e6a793029bc8b25c6290c88fb688093ec56636fcb1eb651395871 +oid sha256:a3bc84c662b5fab8d927c1faeeb0dca751fd1f4048e013936a22011c85e8d154 size 14644224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7513778fc8bf6a24fe1ad7a3e7403ce4aed228f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d9f188c18c6026997c69ca5e5fb2b56c230547e4f77c84002047cf92a07fa2 +size 405967 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..708aa16b1c4a527dbe2dc65730cb0ebf284943ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b54af6ae513021b750620b7749e97b35e21ba91315de4e913c7b223a0a1c951 +size 3943424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d403cc1c90e8667685eac14c2ee664dc55f39967 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_29010310fe43264b7105+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883c07a022070e8d246ed578752ecdd2b9244e0da5fc79c9860383a8be444786 +size 4018637 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f8e2f6fd925e2dd9f86386a190a50697428945ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa60c9ce8de33b02366450a36e239b7a83bb2caef842b87525f856a0c51576c +size 471625 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ab3e0f43f83b940399bbd12bcdc40d784f6fdce --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fcf7c990c6fe04fecca+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74458bb058b81899ca02684781170f7b61422576d137cdb2a2c9b0216d751bc2 +size 114893824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cea489abd0280ed2ddff30b459393b656c96d57f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb7c75bcfa666317e73a581023c3ec1ed6008d09e5b1cb0aa466f441350b60d +size 434848 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b5636949e9e504182abb0d1746467ed53e53ebd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68052a3ce6462c8b3a87+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef43268b32d01d319eb39bf02e056d3caa82f7d742f44831f165634ec7a4538 +size 42179584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78c9e4beae8ae2b514a7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78c9e4beae8ae2b514a7+24129607/model.neff index 0a0729395e4048e87293b5b061107ab4e62dde3c..ee45e7d408f28fa9f47f6e7a75ff462c7a7c1d8d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_78c9e4beae8ae2b514a7+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78c9e4beae8ae2b514a7+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87e73ef01588fef80e6c87110d1ba0b3a9ab156d3a5bea2935b6f97e8118ce99 +oid sha256:d06f75a1e4b52e3c6677ce2384b543882d8ea3cdb9cd281c78f12844ed9d25e5 size 5223424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/model.neff index 0652d940f15efe0d7ac67c3aaddfd37636dff606..1517134a40e86ad51b03d06c7f03badf147e4106 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ebfca33943e9704e99bfce6d117ab22e39439ad7d94d3eb6412585ee49cfcf2 +oid sha256:67a2afa1e975e6f17aff377c0d882e1d5607d1648ec03a9559820318b24b7de9 size 12411904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/wrapped_neff.hlo index c43a704271423bdc93e4a8a5eb80435ab0aba9dd..36ce1bad1dede89129b7473ae1478dc54122ac5a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9fe6e286c1c48b3cf2a9+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de006ce50095b37b169e379fcd1bf1903c0e22ffe60711a8e858b04836e2ee85 +oid sha256:0dc8f9adfa9882a7212d3390f7feae5392fafc4121eaf1036b1e37b796bd56e6 size 12558920 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff index 77e864ed0e4d66df411cb76423dd652185d8629e..2a75726e1af4aa16315d5790f7e083d65f158a0d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a6d912262b31e81edfe6+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ebe7aa5e1ba33d7e2cb909b8df9ee8d25f1f1878a4dde47c74622a01bd3d37 +oid sha256:d6fcd0208f93d0034e8ba27de29cacfb822d98c06bc860349612fef53ad374a7 size 114013184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f32b137330f5cb99f5b590766286e9f0f9c9d33d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8864820c647eccdedc2495fc67717a488fef6ea60d739bf955c2e0cd7dd0b4 +size 387415 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ef7c5eb4e8b13435a7094c33cb1afd5622a4b9c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff4c396c50e94b051961771d1d63573f803fb6a6c12eb90b943d32b9ceddf21 +size 1926144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e96e7d5832f815481e33a280b561d5f2e2ceecbc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_af4b3f9716f68f17b23b+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200039acaadeac5ccddc751a1b16f4d7160f8536b7f327c8ab703be672cdff69 +size 2000113 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8dc3a3f51943735ad3a49005bb53b43f8feb83e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2bfc224ba5b7eb401944f176848782435de31560cbc75c768739d851f495f4 +size 398951 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a8b2235828700caaee59c821bda3c4c284f930b8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bdaa2b3178472459a72d3432576eb34d265af9fda821e9a074f9646df85ebdb +size 2315264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d3129c5257de1dfc9bc82b740a88c3bafc982b8c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c5df6a2849dc2a262550+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d31a08e6e5048da612ee398da3da4d3607bca3b7b1f270e968d0290ffbc2b30 +size 2389212 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/model.neff index b81c54d8bcc4591477034752d7845de160619d00..5b3969beed84049fb75e12a7b05389aef79353ef 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a754f387eead2da41c87675a2ed90e16cd3dc29ed35cb115a7644b109f6cbd4e +oid sha256:9313704e927115bf13417b3e5a9ec30f8110a95faafa6e618a62a5a8bb57b41c size 5008384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/wrapped_neff.hlo index 4560ce4f4211ac808366455bbf8898edcdec6526..395af3e1d08b9e7f450b0f62856ecd138ae7f709 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f6a3ff2ad752e338116c+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9606f9760075dbfd4e72b8d6405370b8f71e215200bba737285f8ae1487eb2d +oid sha256:751851802621b827355ab22caefb2171bebad4082053c0f31138e142b0491811 size 5192563 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3500b21a0dc2e925c4785a6c170b8b35a6376641 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2418c35dd6460f001f08e963cf023c9ad398a31771a7d06fe5519d9096b4b8 +size 445265 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3c5ec28527315cd872a2e4d55d3fffb8f365849c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ff08200107e17dde7c8b+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5dade8293bc49afc1309c6579819e7ea6f510bfa30cc4e8d9ba957478846365 +size 217058304