diff --git a/.gitattributes b/.gitattributes index b33fc71c5a4db1290bbb1013b37b79888e310df1..35d048b54e5bd1746b3f6cb1d1a39f6ee3d0d204 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5708,3 +5708,32 @@ neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_77f1b8fbe51833738f1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_7b28cd1e3dc6bc844fa3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..85de4563a8a26b1586e5f1bc560873a827ce0d5e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaac8a0d1057bbcaaf5fc3dc85fa7699af59ddc07c1c26435640d6112acceff2 +size 378947 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eadb87407b46ba1a83fd4d4f6ac8d1b7bc8aa8a7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24564b3a426253a6c9a58a73a9dedf7df1bb3c4ae4f9c3029d426f107c9c7b4 +size 1926144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c9dfbc44d8ca8feac4fbca9301a4228500fce41f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7485e701ed5c6302ee1967ad86e7d57db68d08f66dca5344c6ee9783377ef51 +size 2000256 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ffd5c27ec279bad5c57820a34203f50291ca6af --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c69e83428c93f3412168937e1df467dd69d10ef46e9576f1fcb96de906e1a86 +size 382812 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..76346e1992e6c459e78824649d4ed94ff63e13de --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76209c24a04f98926f0cfe3fb63de3d27ef082dcdb30d4fec9ee2de7c5f18d37 +size 3032064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..69a404e3399cec1a7f6a2b2e0f9c53b58a4a07d0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca1eb5edafa963dab3105d66b6044e615664da9c73f0a1a998f7ea8ae9ba07b7 +size 3106169 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2be4392d71eb255773052f55d30aca80803ae9da --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d430b5a352b7fbee3742c299062adfd10f1232b12cd924c52e979764a23e3f8 +size 382348 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..50a21cbd45dec18d38125d1b40efd8cd10bfbe4a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24de20088f1002f739a76dff410698f254aab56f4dbdd8cdd5e85d2770eaa7c +size 2049024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2cf63bca4e60ff783a6ff491ffe6c8204584691 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5ca74126a5794dacf54743b1bbfd785f5eb4f45e34cb5f0280e864eeb78a0a +size 2123112 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..563dbd41e22b2d8421294f7d016cf1ae0dcdf7ae --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e997c4e79e96cbcb32a125f0d039e049ef9db52977baa2ba2ff1c61cc44f8728 +size 472299 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b2095c9af54431f7d4f654d7ddfc9c86cfbf034d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6f412e048ddcb9a23fa9f92216ff3eeeb3eeeb78a0c4c5aada3c1962097b44 +size 8203264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1e0f3a0c35f1f9afe8e66526c13abe2371e65fdb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62f6d9bc9df3e2de19505d67e63c54515ab460fea5371f2c82e9e8d3c67322b +size 467299 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3087ede30df6c7904780408e81c352cddd41179d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4934ab602c98cf48c3c0047ec8bdd1706da17f5f5e7e9542e04045911cfeb3 +size 4363264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d35a5b7f9d6c9e9d2b2166ccd7c67a322118cc8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0b4a58bc33a450116485bdc97c80db38b76b6cf227f1a396467a16f8a24120 +size 463666 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..021bc3b27b57510caa6aeaa257373abe8ebe838b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1635305f59594bb96729057bfbd84fefd31db1795f3e1248314ef8e7b6e273 +size 3953664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..107cebd2dc9223d7d557c2256bdd494131263beb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..92935257f68c4868ee22289df0b02b9bc9184bf8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6e644ce4a647575cac6ee8a2b724e7eeddce2a15d81873dcb06658ee9b43c2 +size 374121 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1a4b0d304f46d736f7b099bd7182b9663b23389e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27888041c8321fa7821e9e2462fa24906b92a55403e477ad4270efcf29f80abf +size 1117184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f1be8b54518143719e0c3eb60828b4ac84901da1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f4497b6c14d756912c64bdf11a914fbeeea99ea086717893013947ddb15959 +size 1209802 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0a2078b3647f01aa3165babe7ccfb751b4575e61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d043f2b3418e69ff03a62fd1d19a89b9ab1b52e6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4ab156514485dbf5d74db2535e84136a07f17752c578320af5984844defbff +size 468446 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3a5516c40c36be97003df94e78b1c693d72651a1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6791c3341dd45d1cdf2724e98350fb7b1b08c8198db84fcd650b33c04117b0 +size 22252544 diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json new file mode 100644 index 0000000000000000000000000000000000000000..57b0c8ef0e89dfcf24119b497faf000c71083e49 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json new file mode 100644 index 0000000000000000000000000000000000000000..eacd6923727a653d98d2647649dea9eca38f4d13 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json new file mode 100644 index 0000000000000000000000000000000000000000..b6cb5c9878246185d850718290292e714c23ff33 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json new file mode 100644 index 0000000000000000000000000000000000000000..0be835ccb4574668bdcbad446e495e4ff967e8d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json new file mode 100644 index 0000000000000000000000000000000000000000..1176d2993637727e30b887f0d66cad7e9c14d8d6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json new file mode 100644 index 0000000000000000000000000000000000000000..e61b6b2f77a834566a6cd079527df0dda438f50e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json new file mode 100644 index 0000000000000000000000000000000000000000..7e96121ea85f2a51c8013ab3a71f96a7eb38d9d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json new file mode 100644 index 0000000000000000000000000000000000000000..b0c6f88746d3fa626bf974d2ae730cd1a72c8e0b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json new file mode 100644 index 0000000000000000000000000000000000000000..2aa821b22dab835c81f1b78ebb8532c9f9e7ea86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json new file mode 100644 index 0000000000000000000000000000000000000000..0f3c9e78e9674d89f6387f5aec6ecfbc8a933449 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json new file mode 100644 index 0000000000000000000000000000000000000000..414e12ffe0e80f2374f0def3ca2478102205f4f7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev3", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": true, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1c0c5bd4738b8c097795c218feb6443ef6ed63bc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e445156b47032a190d83fc2c046434ceebaf6955ca33f5225b018b68f9d2c21 +size 464761 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec378fa6ca64ccf10e111d566464005796bcb14c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2123c893919ee13fb7762a2640a6ec76d00052c62ea198a63100094cf2a075 +size 42363904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71d8f28725a6fd36067e26814a4ebfe648ca205e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7e20ba12250d08b854a76456fa2e4e0d6ddf13691e025257a2e3778525e023 +size 867147 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3e106e70fad71397498cdf003f227ab6a0a0176e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b2ee4243bd8fe7035f1301940240484b1b2a18187aa6368723739798ce86d1 +size 11684864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0976a343019ccf01ea1d9c1c819850344b771e59 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58abb644d9362c99fa166606676a8dc86a64eb8676a59269c70083ad0e2ebad +size 504323 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dd1a644afcb2c296d9d634ad5d1edf8aa25f5642 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3ba0eb6e1df08cbffca30403208d7bc01a9dbf90456333ab4e02f7ff41d3a7 +size 42148864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e4b079abea0f82423ab2605e3d2e8fdcaeec1d47 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996734e9b61410c29a1dec6e20faca089cdc9e6f6d1ed6361c1cdc1ebf79de20 +size 439084 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4e7baaa4c5b1e285fb139a71985cf649d607a491 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdcb29853ebd5b5be6ccc9f14b429ed3911423c1b9ee98ee9e5e1491e989c8df +size 31325184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14fdbbb33d6e5dc636e9b23afebc653c24205dcd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56098bbc4e1115bb073697e1c5a349c4e8bb83656efcf80c80ecc9d9cb4d5767 +size 965833 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5ab7f115f126ee090e328a9b38fd742814a9cad7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70cc500c4a4184db6b0b40edc013e749fbf1c78edfc6f513aae54d3dd9349282 +size 6687744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f6f790355c18a49e170acb8dfdc326047ee5b1bc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c677f4472d5cb6ee4fb280b2f7f0e9ec44ab34e3d2f22624181c1b9f68b9401 +size 749530 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c9c4413fc51145e4cecde458badaed3765a3785e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838f56981e2283d5d2fc868f0288ab80d1b456db738cd422573fb27e75699297 +size 3124224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7ccf8a3794c0cd8be6bf5e5d96bf71e696de01f7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a2383839877ae97d7e19f0f8758b8ce9e6bde442c0ac88fa32ea398a0ed7c5 +size 3272392 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1061888e375ff6c653817e77647513d0f85f4a51 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9618983cb3b58f43eba8b4defb4ef66b3f8bdd11d441c021d90d3507ba304ef5 +size 923499 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..182c2dd0f0c9c8b7ccb61f5cd10e94a7a517430e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf6f3e794ce64e8618eafbe02bfa901b4ca21963ddd8542ee648dd6755613a5 +size 11408384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfcb34dedfbd344ffcdb8170137c2132b684f2d0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5bdaf7ab1c592348c87b32d7abd5f29334e7ba4607e47b500beded5fa611d2 +size 760831 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de6aa0a026054cb3bc1a3832903adad67a422864 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312ef0522c097b8a8ea70483f3ba91f2faaaa1cb53fd0579b80af8ba3520ac59 +size 9165824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4f078d8d757930e2c29bbe021816285dccd977fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7519b76e0ce31948d91cc3a9cd78e334016664da913a256f822cc179fe0d71 +size 9313782 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eed523bea6eeaa784de41539eea2f866aeb3271f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac2983af5e71f668da961091ac3df341f2faebdcc5decdbf94e2a015b272ad8 +size 378995 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..874485ac17f12cdf2fecf1a47e7a0a6630e501c4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f7b39fec1e16b5f026adada7723d0bb6add3f42dec771d0a4f85ed419ee716 +size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3894d7eb038a41564885f5529badc4c6d7360bfc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741c44dc2f1de52d46c5aa9dadfb01cd7bb8b8cfd898a8c7604c596fb1149f93 +size 2010496 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b26925848f25079f899f904423b5514397351583 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79197135d214495e13bdbe0883fd3e9dde45a54149ba177c7d32773488375d0 +size 434848 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1cc99a40553298ef0dab7e056ec916b1671e0b7a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25dd3c84d4240f07300673a2919f10729a6e6ece2c1444ec397e737ce1d73ac +size 42179584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5520594aaa25ff8cc0e450c4d88f80a9136f2bec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed141f95bc65aaa516c591933deb8e5be01c79c4a6efa2e311512186099619c +size 468573 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4b104238bb81ed6139ed2789d5af757e2682a05e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34840aa752b8ac73e36fcadeddaded78e26f109cbc4b9f7125f902c7d3da820 +size 31048704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..779d6adad1e216cb8b1f10e339136e26a5a696a8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5f7e7fb48516f7bede6624ee0b00b2dde29dc93d69a43c49418738ed77f7a2 +size 926239 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9c33a06391256ad23e7bb5534b1742e9175d3399 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96120e3848fe6462a17a682fd74c0227d2b9a2a3e0769c25715d282b27e56d0 +size 6575104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c81c30e219fd168ba5fdb3df5e4fede33a859ced --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4b1a20b9b5729a253582eeded6be8bd71404900097df80d61fa2eaca91aa9a +size 380126 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91e6eb7390977bff5b84580f3e316b0a7f9e28ef --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d181f557e36d91e6bcebdb4191a973b4a25c0f5b4ec1c9203c7d904bd567591 +size 2765824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3eb56d86ce9869f96d486099259afbc86cb21cbd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d519613c8b28426f8dbf8d72ce20a247514bab75a7f1106b5c77b385fbb173cb +size 2840167