dacorvo HF Staff commited on
Commit
7e6e7ae
·
verified ·
1 Parent(s): 2ace5df

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +87 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json +63 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/5d6508f3c305508a9e35.json +63 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/fcb5fb70d7e694e229e7.json +63 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/7bd52b717d35cfb4df0d.json +59 -0
  6. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/c7cc8db735ece7aeba17.json +59 -0
  7. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/0ed08478601ada771841.json +64 -0
  8. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/4b20db2f875ddad665e5.json +63 -0
  9. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/69ab326570dcf8778e4c.json +64 -0
  10. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/8a48ba4681cd05481cfe.json +64 -0
  11. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/f92daef82938443934d0.json +64 -0
  12. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/45bac7b83a13be6ba9d0.json +63 -0
  13. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/bd06e66d087fee3632bb.json +63 -0
  14. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3.5-mini-instruct/639c2df611ecdbd507cd.json +164 -0
  15. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/Qwen/Qwen2.5-0.5B/2c2550e69bd7b8401151.json +83 -0
  16. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/Qwen/Qwen2.5-0.5B/39d0494022f8af95d63a.json +83 -0
  17. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-0.6B/a65675cac42f945a53ae.json +88 -0
  18. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/3717bfc5b3b2064b6cab.json +134 -0
  19. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/6460e8bdaaec6d3aaf4f.json +135 -0
  20. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/b3f860566b83df7cb655.json +135 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/compile_flags.json +1 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.done +0 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb +3 -0
  24. neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff +3 -0
  25. neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json +1 -0
  26. neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done +0 -0
  27. neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb +3 -0
  28. neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff +3 -0
  29. neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo +3 -0
  30. neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json +1 -0
  31. neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done +0 -0
  32. neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb +3 -0
  33. neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff +3 -0
  34. neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo +3 -0
  35. neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/compile_flags.json +1 -0
  36. neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.done +0 -0
  37. neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb +3 -0
  38. neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff +3 -0
  39. neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo +3 -0
  40. neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff +1 -1
  41. neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/compile_flags.json +1 -0
  42. neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.done +0 -0
  43. neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.hlo_module.pb +3 -0
  44. neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.neff +3 -0
  45. neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/wrapped_neff.hlo +3 -0
  46. neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/compile_flags.json +1 -0
  47. neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.done +0 -0
  48. neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.hlo_module.pb +3 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.neff +3 -0
  50. neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json +1 -0
.gitattributes CHANGED
@@ -14691,3 +14691,90 @@ neuronxcc-2.21.33363.0+82129205/MODULE_a60ee6952450ce742f72+f00715ce/model.neff
14691
  neuronxcc-2.21.33363.0+82129205/MODULE_a60ee6952450ce742f72+f00715ce/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14692
  neuronxcc-2.21.33363.0+82129205/MODULE_e0dc711f77a602f2be1f+25971132/model.neff filter=lfs diff=lfs merge=lfs -text
14693
  neuronxcc-2.21.33363.0+82129205/MODULE_e0dc711f77a602f2be1f+25971132/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14691
  neuronxcc-2.21.33363.0+82129205/MODULE_a60ee6952450ce742f72+f00715ce/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14692
  neuronxcc-2.21.33363.0+82129205/MODULE_e0dc711f77a602f2be1f+25971132/model.neff filter=lfs diff=lfs merge=lfs -text
14693
  neuronxcc-2.21.33363.0+82129205/MODULE_e0dc711f77a602f2be1f+25971132/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14694
+ neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff filter=lfs diff=lfs merge=lfs -text
14695
+ neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14696
+ neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14697
+ neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14698
+ neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14699
+ neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14700
+ neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14701
+ neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14702
+ neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14703
+ neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14704
+ neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14705
+ neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14706
+ neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14707
+ neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14708
+ neuronxcc-2.21.33363.0+82129205/MODULE_30826b478ad3db0c312e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14709
+ neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14710
+ neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14711
+ neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14712
+ neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14713
+ neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14714
+ neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text
14715
+ neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14716
+ neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14717
+ neuronxcc-2.21.33363.0+82129205/MODULE_4e5d28a9a05b8ba71697+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14718
+ neuronxcc-2.21.33363.0+82129205/MODULE_4e5d28a9a05b8ba71697+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14719
+ neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14720
+ neuronxcc-2.21.33363.0+82129205/MODULE_5299d075a10cd88b75ef+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14721
+ neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14722
+ neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14723
+ neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14724
+ neuronxcc-2.21.33363.0+82129205/MODULE_60946426f567b03182de+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14725
+ neuronxcc-2.21.33363.0+82129205/MODULE_69c3f39b7e46f645ac72+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14726
+ neuronxcc-2.21.33363.0+82129205/MODULE_69c3f39b7e46f645ac72+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14727
+ neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14728
+ neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14729
+ neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14730
+ neuronxcc-2.21.33363.0+82129205/MODULE_80b2eb092f15a0509d79+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14731
+ neuronxcc-2.21.33363.0+82129205/MODULE_80b2eb092f15a0509d79+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14732
+ neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text
14733
+ neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14734
+ neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14735
+ neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14736
+ neuronxcc-2.21.33363.0+82129205/MODULE_8c6b204d1bccf64885d0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14737
+ neuronxcc-2.21.33363.0+82129205/MODULE_8e336cee3fb428dcae05+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14738
+ neuronxcc-2.21.33363.0+82129205/MODULE_906d1a9a7651a30426b8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14739
+ neuronxcc-2.21.33363.0+82129205/MODULE_906d1a9a7651a30426b8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14740
+ neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14741
+ neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14742
+ neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14743
+ neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff filter=lfs diff=lfs merge=lfs -text
14744
+ neuronxcc-2.21.33363.0+82129205/MODULE_a1dbfcc237b9198dc735+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14745
+ neuronxcc-2.21.33363.0+82129205/MODULE_ad0618d39f30ab05071a+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text
14746
+ neuronxcc-2.21.33363.0+82129205/MODULE_b6a329fd6d4912085786+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14747
+ neuronxcc-2.21.33363.0+82129205/MODULE_b6a329fd6d4912085786+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14748
+ neuronxcc-2.21.33363.0+82129205/MODULE_b860b7ba935f627d151d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14749
+ neuronxcc-2.21.33363.0+82129205/MODULE_b860b7ba935f627d151d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14750
+ neuronxcc-2.21.33363.0+82129205/MODULE_bee928e1ff2bd1f5e3e6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14751
+ neuronxcc-2.21.33363.0+82129205/MODULE_bf62cea1b964ce1cb653+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14752
+ neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14753
+ neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14754
+ neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14755
+ neuronxcc-2.21.33363.0+82129205/MODULE_c9349ccef5ee7c09a049+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14756
+ neuronxcc-2.21.33363.0+82129205/MODULE_c9349ccef5ee7c09a049+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14757
+ neuronxcc-2.21.33363.0+82129205/MODULE_cb05698d60b06f387ed6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14758
+ neuronxcc-2.21.33363.0+82129205/MODULE_cb05698d60b06f387ed6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14759
+ neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14760
+ neuronxcc-2.21.33363.0+82129205/MODULE_cf7ffe281543e5e4c3d3+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14761
+ neuronxcc-2.21.33363.0+82129205/MODULE_cf7ffe281543e5e4c3d3+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14762
+ neuronxcc-2.21.33363.0+82129205/MODULE_d107c2d38c7104284da7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14763
+ neuronxcc-2.21.33363.0+82129205/MODULE_d107c2d38c7104284da7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14764
+ neuronxcc-2.21.33363.0+82129205/MODULE_d1e075f89e8272c07272+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14765
+ neuronxcc-2.21.33363.0+82129205/MODULE_d407bccc563987df7700+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14766
+ neuronxcc-2.21.33363.0+82129205/MODULE_d407bccc563987df7700+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14767
+ neuronxcc-2.21.33363.0+82129205/MODULE_d9f17410cfb469b24055+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14768
+ neuronxcc-2.21.33363.0+82129205/MODULE_d9f17410cfb469b24055+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14769
+ neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text
14770
+ neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14771
+ neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14772
+ neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14773
+ neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14774
+ neuronxcc-2.21.33363.0+82129205/MODULE_ef557fc37471d6bfd06d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
14775
+ neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14776
+ neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14777
+ neuronxcc-2.21.33363.0+82129205/MODULE_fd476e174a39b55eeb18+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14778
+ neuronxcc-2.21.33363.0+82129205/MODULE_fd476e174a39b55eeb18+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
14779
+ neuronxcc-2.21.33363.0+82129205/MODULE_fff86602d73f8f8ecb89+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
14780
+ neuronxcc-2.21.33363.0+82129205/MODULE_fff86602d73f8f8ecb89+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "bfloat16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 32,
47
+ "num_hidden_layers": 16,
48
+ "num_key_value_heads": 8,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 32.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": true,
60
+ "unsloth_fixed": true,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/5d6508f3c305508a9e35.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": null,
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": false,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "speculation_length": 5,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "bfloat16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 32,
47
+ "num_hidden_layers": 16,
48
+ "num_key_value_heads": 8,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 32.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": true,
60
+ "unsloth_fixed": true,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/fcb5fb70d7e694e229e7.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": null,
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": false,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "bfloat16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 32,
47
+ "num_hidden_layers": 16,
48
+ "num_key_value_heads": 8,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 32.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": true,
60
+ "unsloth_fixed": true,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/7bd52b717d35cfb4df0d.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.33363.0+82129205",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.4.dev1",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "sequence_parallel_enabled": false,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn1",
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 32,
50
+ "num_hidden_layers": 40,
51
+ "num_key_value_heads": 8,
52
+ "residual_multiplier": 0.22,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 5000000.0,
56
+ "tie_word_embeddings": true,
57
+ "use_cache": true,
58
+ "vocab_size": 49155
59
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/c7cc8db735ece7aeba17.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 8192,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 8192,
36
+ "neuronxcc_version": "2.21.33363.0+82129205",
37
+ "on_device_sampling": false,
38
+ "optimum_neuron_version": "0.4.4.dev1",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 8192,
42
+ "sequence_parallel_enabled": true,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn1",
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 32,
50
+ "num_hidden_layers": 40,
51
+ "num_key_value_heads": 8,
52
+ "residual_multiplier": 0.22,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 5000000.0,
56
+ "tie_word_embeddings": true,
57
+ "use_cache": true,
58
+ "vocab_size": 49155
59
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/0ed08478601ada771841.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": null,
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": false,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.4.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 5,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/4b20db2f875ddad665e5.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 8192,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 8192,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.4.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 8192,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "bfloat16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 32,
47
+ "num_hidden_layers": 16,
48
+ "num_key_value_heads": 8,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 32.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": true,
60
+ "unsloth_fixed": true,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/69ab326570dcf8778e4c.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": null,
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": false,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.4.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/8a48ba4681cd05481cfe.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 8192,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 8192,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.4.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 8192,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/f92daef82938443934d0.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.4.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/45bac7b83a13be6ba9d0.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 4096,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
27
+ "checkpoint_revision": "f39ac1d28e925b323eae81227eaba4464caced4e",
28
+ "continuous_batching": true,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 4,
34
+ "max_context_length": 4096,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 4096,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.4.dev1",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 4096,
43
+ "sequence_parallel_enabled": false,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": "trn1",
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "original_max_position_embeddings": 4096,
54
+ "partial_rotary_factor": 1.0,
55
+ "resid_pdrop": 0.0,
56
+ "rms_norm_eps": 1e-05,
57
+ "rope_scaling": null,
58
+ "rope_theta": 10000.0,
59
+ "sliding_window": 2047,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 32064
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/bd06e66d087fee3632bb.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 4096,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
27
+ "checkpoint_revision": "f39ac1d28e925b323eae81227eaba4464caced4e",
28
+ "continuous_batching": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 8192,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 8192,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": false,
39
+ "optimum_neuron_version": "0.4.4.dev1",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 8192,
43
+ "sequence_parallel_enabled": true,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": "trn1",
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "original_max_position_embeddings": 4096,
54
+ "partial_rotary_factor": 1.0,
55
+ "resid_pdrop": 0.0,
56
+ "rms_norm_eps": 1e-05,
57
+ "rope_scaling": null,
58
+ "rope_theta": 10000.0,
59
+ "sliding_window": 2047,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 32064
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3.5-mini-instruct/639c2df611ecdbd507cd.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3.5-mini-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3.5-mini-instruct",
27
+ "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77",
28
+ "continuous_batching": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 8192,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 8192,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.4.dev1",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 8192,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn1",
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 32,
50
+ "num_hidden_layers": 32,
51
+ "num_key_value_heads": 32,
52
+ "original_max_position_embeddings": 4096,
53
+ "partial_rotary_factor": 1.0,
54
+ "resid_pdrop": 0.0,
55
+ "rms_norm_eps": 1e-05,
56
+ "rope_scaling": {
57
+ "long_factor": [
58
+ 1.0800000429153442,
59
+ 1.1100000143051147,
60
+ 1.1399999856948853,
61
+ 1.340000033378601,
62
+ 1.5899999141693115,
63
+ 1.600000023841858,
64
+ 1.6200000047683716,
65
+ 2.620000123977661,
66
+ 3.2300000190734863,
67
+ 3.2300000190734863,
68
+ 4.789999961853027,
69
+ 7.400000095367432,
70
+ 7.700000286102295,
71
+ 9.09000015258789,
72
+ 12.199999809265137,
73
+ 17.670000076293945,
74
+ 24.46000099182129,
75
+ 28.57000160217285,
76
+ 30.420001983642578,
77
+ 30.840002059936523,
78
+ 32.590003967285156,
79
+ 32.93000411987305,
80
+ 42.320003509521484,
81
+ 44.96000289916992,
82
+ 50.340003967285156,
83
+ 50.45000457763672,
84
+ 57.55000305175781,
85
+ 57.93000411987305,
86
+ 58.21000289916992,
87
+ 60.1400032043457,
88
+ 62.61000442504883,
89
+ 62.62000274658203,
90
+ 62.71000289916992,
91
+ 63.1400032043457,
92
+ 63.1400032043457,
93
+ 63.77000427246094,
94
+ 63.93000411987305,
95
+ 63.96000289916992,
96
+ 63.970001220703125,
97
+ 64.02999877929688,
98
+ 64.06999969482422,
99
+ 64.08000183105469,
100
+ 64.12000274658203,
101
+ 64.41000366210938,
102
+ 64.4800033569336,
103
+ 64.51000213623047,
104
+ 64.52999877929688,
105
+ 64.83999633789062
106
+ ],
107
+ "short_factor": [
108
+ 1.0,
109
+ 1.0199999809265137,
110
+ 1.0299999713897705,
111
+ 1.0299999713897705,
112
+ 1.0499999523162842,
113
+ 1.0499999523162842,
114
+ 1.0499999523162842,
115
+ 1.0499999523162842,
116
+ 1.0499999523162842,
117
+ 1.0699999332427979,
118
+ 1.0999999046325684,
119
+ 1.1099998950958252,
120
+ 1.1599998474121094,
121
+ 1.1599998474121094,
122
+ 1.1699998378753662,
123
+ 1.2899998426437378,
124
+ 1.339999794960022,
125
+ 1.679999828338623,
126
+ 1.7899998426437378,
127
+ 1.8199998140335083,
128
+ 1.8499997854232788,
129
+ 1.8799997568130493,
130
+ 1.9099997282028198,
131
+ 1.9399996995925903,
132
+ 1.9899996519088745,
133
+ 2.0199997425079346,
134
+ 2.0199997425079346,
135
+ 2.0199997425079346,
136
+ 2.0199997425079346,
137
+ 2.0199997425079346,
138
+ 2.0199997425079346,
139
+ 2.0299997329711914,
140
+ 2.0299997329711914,
141
+ 2.0299997329711914,
142
+ 2.0299997329711914,
143
+ 2.0299997329711914,
144
+ 2.0299997329711914,
145
+ 2.0299997329711914,
146
+ 2.0299997329711914,
147
+ 2.0299997329711914,
148
+ 2.0799996852874756,
149
+ 2.0899996757507324,
150
+ 2.189999580383301,
151
+ 2.2199995517730713,
152
+ 2.5899994373321533,
153
+ 2.729999542236328,
154
+ 2.749999523162842,
155
+ 2.8399994373321533
156
+ ],
157
+ "type": "longrope"
158
+ },
159
+ "rope_theta": 10000.0,
160
+ "sliding_window": 262144,
161
+ "tie_word_embeddings": false,
162
+ "use_cache": true,
163
+ "vocab_size": 32064
164
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/Qwen/Qwen2.5-0.5B/2c2550e69bd7b8401151.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "dtype": "bfloat16",
10
+ "hidden_act": "silu",
11
+ "hidden_size": 896,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4864,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention"
39
+ ],
40
+ "max_position_embeddings": 32768,
41
+ "max_window_layers": 24,
42
+ "model_type": "qwen2",
43
+ "neuron": {
44
+ "_serialized_key": "NxDNeuronConfig",
45
+ "batch_size": 1,
46
+ "capacity_factor": null,
47
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
48
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
49
+ "continuous_batching": false,
50
+ "ep_degree": 1,
51
+ "fused_qkv": false,
52
+ "glu_mlp": true,
53
+ "local_ranks_size": 2,
54
+ "max_batch_size": 1,
55
+ "max_context_length": 8192,
56
+ "max_topk": 256,
57
+ "n_active_tokens": 8192,
58
+ "neuronxcc_version": "2.21.33363.0+82129205",
59
+ "on_device_sampling": true,
60
+ "optimum_neuron_version": "0.4.4.dev1",
61
+ "output_logits": false,
62
+ "pp_degree": 1,
63
+ "sequence_length": 8192,
64
+ "sequence_parallel_enabled": false,
65
+ "speculation_length": 0,
66
+ "start_rank_id": 0,
67
+ "target": "trn1",
68
+ "torch_dtype": "bfloat16",
69
+ "tp_degree": 2
70
+ },
71
+ "num_attention_heads": 14,
72
+ "num_hidden_layers": 24,
73
+ "num_key_value_heads": 2,
74
+ "rms_norm_eps": 1e-06,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000.0,
77
+ "sliding_window": null,
78
+ "tie_word_embeddings": true,
79
+ "use_cache": true,
80
+ "use_mrope": false,
81
+ "use_sliding_window": false,
82
+ "vocab_size": 151936
83
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/Qwen/Qwen2.5-0.5B/39d0494022f8af95d63a.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "dtype": "bfloat16",
10
+ "hidden_act": "silu",
11
+ "hidden_size": 896,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4864,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention"
39
+ ],
40
+ "max_position_embeddings": 32768,
41
+ "max_window_layers": 24,
42
+ "model_type": "qwen2",
43
+ "neuron": {
44
+ "_serialized_key": "NxDNeuronConfig",
45
+ "batch_size": 4,
46
+ "capacity_factor": null,
47
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
48
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
49
+ "continuous_batching": true,
50
+ "ep_degree": 1,
51
+ "fused_qkv": false,
52
+ "glu_mlp": true,
53
+ "local_ranks_size": 2,
54
+ "max_batch_size": 4,
55
+ "max_context_length": 4096,
56
+ "max_topk": 256,
57
+ "n_active_tokens": 4096,
58
+ "neuronxcc_version": "2.21.33363.0+82129205",
59
+ "on_device_sampling": false,
60
+ "optimum_neuron_version": "0.4.4.dev1",
61
+ "output_logits": false,
62
+ "pp_degree": 1,
63
+ "sequence_length": 4096,
64
+ "sequence_parallel_enabled": false,
65
+ "speculation_length": 0,
66
+ "start_rank_id": 0,
67
+ "target": "trn1",
68
+ "torch_dtype": "bfloat16",
69
+ "tp_degree": 2
70
+ },
71
+ "num_attention_heads": 14,
72
+ "num_hidden_layers": 24,
73
+ "num_key_value_heads": 2,
74
+ "rms_norm_eps": 1e-06,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000.0,
77
+ "sliding_window": null,
78
+ "tie_word_embeddings": true,
79
+ "use_cache": true,
80
+ "use_mrope": false,
81
+ "use_sliding_window": false,
82
+ "vocab_size": 151936
83
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-0.6B/a65675cac42f945a53ae.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 40960,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 1,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
54
+ "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 1,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": true,
66
+ "optimum_neuron_version": "0.4.4.dev1",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "sequence_length": 8192,
70
+ "sequence_parallel_enabled": false,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151936
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/3717bfc5b3b2064b6cab.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "HuggingFaceTB/SmolLM3-3B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "SmolLM3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 65536,
54
+ "max_window_layers": 28,
55
+ "mlp_bias": false,
56
+ "model_type": "smollm3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 1,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
62
+ "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 2,
68
+ "max_batch_size": 1,
69
+ "max_context_length": 8192,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 8192,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": true,
74
+ "optimum_neuron_version": "0.4.4.dev1",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 8192,
78
+ "speculation_length": 0,
79
+ "start_rank_id": 0,
80
+ "target": "trn1",
81
+ "torch_dtype": "bfloat16",
82
+ "tp_degree": 2
83
+ },
84
+ "no_rope_layer_interval": 4,
85
+ "no_rope_layers": [
86
+ 1,
87
+ 1,
88
+ 1,
89
+ 0,
90
+ 1,
91
+ 1,
92
+ 1,
93
+ 0,
94
+ 1,
95
+ 1,
96
+ 1,
97
+ 0,
98
+ 1,
99
+ 1,
100
+ 1,
101
+ 0,
102
+ 1,
103
+ 1,
104
+ 1,
105
+ 0,
106
+ 1,
107
+ 1,
108
+ 1,
109
+ 0,
110
+ 1,
111
+ 1,
112
+ 1,
113
+ 0,
114
+ 1,
115
+ 1,
116
+ 1,
117
+ 0,
118
+ 1,
119
+ 1,
120
+ 1,
121
+ 0
122
+ ],
123
+ "num_attention_heads": 16,
124
+ "num_hidden_layers": 36,
125
+ "num_key_value_heads": 4,
126
+ "pretraining_tp": 2,
127
+ "rms_norm_eps": 1e-06,
128
+ "rope_scaling": null,
129
+ "rope_theta": 5000000.0,
130
+ "sliding_window": null,
131
+ "use_cache": false,
132
+ "use_sliding_window": false,
133
+ "vocab_size": 128256
134
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/6460e8bdaaec6d3aaf4f.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "HuggingFaceTB/SmolLM3-3B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "SmolLM3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 65536,
54
+ "max_window_layers": 28,
55
+ "mlp_bias": false,
56
+ "model_type": "smollm3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 4,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
62
+ "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1",
63
+ "continuous_batching": true,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 2,
68
+ "max_batch_size": 4,
69
+ "max_context_length": 4096,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 4096,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": true,
74
+ "optimum_neuron_version": "0.4.4.dev1",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 4096,
78
+ "sequence_parallel_enabled": false,
79
+ "speculation_length": 0,
80
+ "start_rank_id": 0,
81
+ "target": "trn1",
82
+ "torch_dtype": "bfloat16",
83
+ "tp_degree": 2
84
+ },
85
+ "no_rope_layer_interval": 4,
86
+ "no_rope_layers": [
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 0,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 0,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 0,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 0,
103
+ 1,
104
+ 1,
105
+ 1,
106
+ 0,
107
+ 1,
108
+ 1,
109
+ 1,
110
+ 0,
111
+ 1,
112
+ 1,
113
+ 1,
114
+ 0,
115
+ 1,
116
+ 1,
117
+ 1,
118
+ 0,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 0
123
+ ],
124
+ "num_attention_heads": 16,
125
+ "num_hidden_layers": 36,
126
+ "num_key_value_heads": 4,
127
+ "pretraining_tp": 2,
128
+ "rms_norm_eps": 1e-06,
129
+ "rope_scaling": null,
130
+ "rope_theta": 5000000.0,
131
+ "sliding_window": null,
132
+ "use_cache": false,
133
+ "use_sliding_window": false,
134
+ "vocab_size": 128256
135
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/b3f860566b83df7cb655.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "HuggingFaceTB/SmolLM3-3B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "SmolLM3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 65536,
54
+ "max_window_layers": 28,
55
+ "mlp_bias": false,
56
+ "model_type": "smollm3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 1,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
62
+ "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 2,
68
+ "max_batch_size": 1,
69
+ "max_context_length": 8192,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 8192,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": false,
74
+ "optimum_neuron_version": "0.4.4.dev1",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 8192,
78
+ "sequence_parallel_enabled": true,
79
+ "speculation_length": 0,
80
+ "start_rank_id": 0,
81
+ "target": "trn1",
82
+ "torch_dtype": "bfloat16",
83
+ "tp_degree": 2
84
+ },
85
+ "no_rope_layer_interval": 4,
86
+ "no_rope_layers": [
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 0,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 0,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 0,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 0,
103
+ 1,
104
+ 1,
105
+ 1,
106
+ 0,
107
+ 1,
108
+ 1,
109
+ 1,
110
+ 0,
111
+ 1,
112
+ 1,
113
+ 1,
114
+ 0,
115
+ 1,
116
+ 1,
117
+ 1,
118
+ 0,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 0
123
+ ],
124
+ "num_attention_heads": 16,
125
+ "num_hidden_layers": 36,
126
+ "num_key_value_heads": 4,
127
+ "pretraining_tp": 2,
128
+ "rms_norm_eps": 1e-06,
129
+ "rope_scaling": null,
130
+ "rope_theta": 5000000.0,
131
+ "sliding_window": null,
132
+ "use_cache": false,
133
+ "use_sliding_window": false,
134
+ "vocab_size": 128256
135
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "-O1", "--lnc=1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--internal-enable-dge-levels=vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef50e479badb0312e208336533316393fc797857eb5a2cde744753c266d7f3f
3
+ size 97794
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671f2968ee09229cb96bb22be152df8bd197829d6ce7ad77ef854a3963b716b9
3
+ size 410624
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aeec7d218e22beefd303645f302d7a805df96146ecddac686eb936f8d682209
3
+ size 84114
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7732c3ad2ac92d6eca8c6efd83baeefa27be9061021e79a069e2496d6faa00
3
+ size 246784
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9411f69d1d13a85dfcf64804e552e540bfc24470b7f2b52f419c5eceb8f6357
3
+ size 254967
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948a51e7715bd8232b0ce82f8cca432d73c2a709e6a794d3a228525868c057f7
3
+ size 93425
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c30438f983884de1b30e5866ed44b55b755f839caf513e2312cbdfefc8eda3
3
+ size 277504
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbb4dba625a46bae0587ff94ea66196c0ebfd2bfebd5b2d8680fa00fcf7292a
3
+ size 288898
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6361b23631f22b3534873d4bc3fde1c4197b34dc2d5d84b8097108cc7faed45
3
+ size 596952
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0be47707e53a60759826fc077df760c768f27f3699574a1ca42a2c1721ac66bc
3
+ size 1936384
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e23e21a3d39349b0a3b066fd29eb841c6bb9dba1fde90c7a0792c5717638a18
3
+ size 2092575
neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c63a961d38d6e2faccfb3057e2a3a8ac96875fbd1ade15ba62f09333cfa1e1f0
3
  size 36148224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4ee4b8b64d4dc6330fdd8fae2947adf4f88ba10b2505b8b51907e13863538d
3
  size 36148224
neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca40769d5d3d00298600feafb4f2bdfe3f38f29001fa2e039b0abdca262b79f
3
+ size 707389
neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4bd2263928878732290e6596fa1c0e4a7814eb0474a84d84f85f9b64adccd6
3
+ size 8920064
neuronxcc-2.21.33363.0+82129205/MODULE_1f6141b704e8b040fc5f+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e3eedade64e6006b05c3cca3bd0dadb4d083a1537c460c92a2954778c957ca
3
+ size 9067080
neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bc87b5ffad1211f61cfe408bd0f36ef1b88887fb434341f067d7f562301bbc1
3
+ size 474402
neuronxcc-2.21.33363.0+82129205/MODULE_210575e529dec63f9202+24129607/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadedb7a2a926529dfddc0443cf969ffb2d353356781957c51094d99e2282717
3
+ size 115057664
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]