Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +18 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de98fc65f348a2d8c295.json +59 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/llamafactory/tiny-random-Llama-3/dad125ca8a11952d0a3f.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama4_text/tiny-random/llama-4/cf5cee8c9de5d6faf940.json +82 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/mixtral/dacorvo/Mixtral-tiny/3779219bc59a2ae3e045.json +59 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/yujiepan/phi-4-tiny-random/2ebdbeae8a00090a231f.json +60 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6f05187d051ac3c3e46.json +65 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/18af48e9de9305ddddd5.json +88 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/80594a5958040f8b1ebe.json +88 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a5cf4ee087cd9567449f.json +66 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff +3 -0
.gitattributes
CHANGED
|
@@ -5827,3 +5827,21 @@ neuronxcc-2.21.33363.0+82129205/MODULE_cb05698d60b06f387ed6+a02c3a36/wrapped_nef
|
|
| 5827 |
neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5828 |
neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5829 |
neuronxcc-2.21.33363.0+82129205/MODULE_8c6b204d1bccf64885d0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5827 |
neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5828 |
neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5829 |
neuronxcc-2.21.33363.0+82129205/MODULE_8c6b204d1bccf64885d0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5830 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5831 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5832 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5833 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5834 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5835 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5836 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5837 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5838 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5839 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5840 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5841 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5842 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5843 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5844 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5845 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5846 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5847 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de98fc65f348a2d8c295.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"dtype": "float32",
|
| 12 |
+
"embedding_multiplier": 1.0,
|
| 13 |
+
"hidden_act": "silu",
|
| 14 |
+
"hidden_size": 32,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 64,
|
| 17 |
+
"logits_scaling": 1.0,
|
| 18 |
+
"max_position_embeddings": 2048,
|
| 19 |
+
"mlp_bias": false,
|
| 20 |
+
"model_type": "granite",
|
| 21 |
+
"neuron": {
|
| 22 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 23 |
+
"batch_size": 1,
|
| 24 |
+
"capacity_factor": null,
|
| 25 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 26 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"fused_qkv": true,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"local_ranks_size": 2,
|
| 32 |
+
"max_batch_size": 1,
|
| 33 |
+
"max_context_length": 1024,
|
| 34 |
+
"max_topk": 256,
|
| 35 |
+
"n_active_tokens": 1024,
|
| 36 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 37 |
+
"on_device_sampling": true,
|
| 38 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 39 |
+
"output_logits": false,
|
| 40 |
+
"pp_degree": 1,
|
| 41 |
+
"sequence_length": 1024,
|
| 42 |
+
"sequence_parallel_enabled": false,
|
| 43 |
+
"speculation_length": 0,
|
| 44 |
+
"start_rank_id": 0,
|
| 45 |
+
"target": "trn1",
|
| 46 |
+
"torch_dtype": "float32",
|
| 47 |
+
"tp_degree": 2
|
| 48 |
+
},
|
| 49 |
+
"num_attention_heads": 4,
|
| 50 |
+
"num_hidden_layers": 2,
|
| 51 |
+
"num_key_value_heads": 4,
|
| 52 |
+
"residual_multiplier": 1.0,
|
| 53 |
+
"rms_norm_eps": 1e-06,
|
| 54 |
+
"rope_scaling": null,
|
| 55 |
+
"rope_theta": 10000.0,
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 49152
|
| 59 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/llamafactory/tiny-random-Llama-3/dad125ca8a11952d0a3f.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 1024,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "float16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 4,
|
| 48 |
+
"num_hidden_layers": 2,
|
| 49 |
+
"num_key_value_heads": 4,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama4_text/tiny-random/llama-4/cf5cee8c9de5d6faf940.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "tiny-random/llama-4",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_chunk_size": 128,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"attn_scale": 0.1,
|
| 9 |
+
"attn_temperature_tuning": 4,
|
| 10 |
+
"cache_implementation": "hybrid",
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"floor_scale": 8192,
|
| 13 |
+
"for_llm_compressor": false,
|
| 14 |
+
"head_dim": 32,
|
| 15 |
+
"hidden_act": "silu",
|
| 16 |
+
"hidden_size": 32,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"interleave_moe_layer_step": 2,
|
| 19 |
+
"intermediate_size": 64,
|
| 20 |
+
"intermediate_size_mlp": 128,
|
| 21 |
+
"layer_types": [
|
| 22 |
+
"chunked_attention",
|
| 23 |
+
"chunked_attention",
|
| 24 |
+
"chunked_attention",
|
| 25 |
+
"full_attention"
|
| 26 |
+
],
|
| 27 |
+
"max_position_embeddings": 1048576,
|
| 28 |
+
"model_type": "llama4_text",
|
| 29 |
+
"moe_layers": [
|
| 30 |
+
1,
|
| 31 |
+
3
|
| 32 |
+
],
|
| 33 |
+
"neuron": {
|
| 34 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 35 |
+
"batch_size": 1,
|
| 36 |
+
"capacity_factor": null,
|
| 37 |
+
"checkpoint_id": "tiny-random/llama-4",
|
| 38 |
+
"checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4",
|
| 39 |
+
"continuous_batching": false,
|
| 40 |
+
"ep_degree": 1,
|
| 41 |
+
"fused_qkv": false,
|
| 42 |
+
"glu_mlp": true,
|
| 43 |
+
"local_ranks_size": 2,
|
| 44 |
+
"max_batch_size": 1,
|
| 45 |
+
"max_context_length": 1024,
|
| 46 |
+
"max_topk": 256,
|
| 47 |
+
"n_active_tokens": 1024,
|
| 48 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 49 |
+
"on_device_sampling": true,
|
| 50 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 51 |
+
"output_logits": false,
|
| 52 |
+
"pp_degree": 1,
|
| 53 |
+
"sequence_length": 1024,
|
| 54 |
+
"sequence_parallel_enabled": false,
|
| 55 |
+
"speculation_length": 0,
|
| 56 |
+
"start_rank_id": 0,
|
| 57 |
+
"target": "trn1",
|
| 58 |
+
"torch_dtype": "bfloat16",
|
| 59 |
+
"tp_degree": 2
|
| 60 |
+
},
|
| 61 |
+
"no_rope_layers": [
|
| 62 |
+
1,
|
| 63 |
+
1,
|
| 64 |
+
1,
|
| 65 |
+
0
|
| 66 |
+
],
|
| 67 |
+
"num_attention_heads": 1,
|
| 68 |
+
"num_experts_per_tok": 1,
|
| 69 |
+
"num_hidden_layers": 4,
|
| 70 |
+
"num_key_value_heads": 1,
|
| 71 |
+
"num_local_experts": 8,
|
| 72 |
+
"output_router_logits": false,
|
| 73 |
+
"rms_norm_eps": 1e-05,
|
| 74 |
+
"rope_scaling": null,
|
| 75 |
+
"rope_theta": 500000.0,
|
| 76 |
+
"router_aux_loss_coef": 0.001,
|
| 77 |
+
"router_jitter_noise": 0.0,
|
| 78 |
+
"tie_word_embeddings": true,
|
| 79 |
+
"use_cache": true,
|
| 80 |
+
"use_qk_norm": true,
|
| 81 |
+
"vocab_size": 202048
|
| 82 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/mixtral/dacorvo/Mixtral-tiny/3779219bc59a2ae3e045.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"dtype": "float16",
|
| 10 |
+
"head_dim": 32,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3584,
|
| 15 |
+
"max_position_embeddings": 1024,
|
| 16 |
+
"model_type": "mixtral",
|
| 17 |
+
"neuron": {
|
| 18 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 19 |
+
"batch_size": 1,
|
| 20 |
+
"capacity_factor": null,
|
| 21 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 22 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 23 |
+
"continuous_batching": false,
|
| 24 |
+
"ep_degree": 1,
|
| 25 |
+
"fused_qkv": false,
|
| 26 |
+
"glu_mlp": true,
|
| 27 |
+
"local_ranks_size": 2,
|
| 28 |
+
"max_batch_size": 1,
|
| 29 |
+
"max_context_length": 1024,
|
| 30 |
+
"max_topk": 256,
|
| 31 |
+
"n_active_tokens": 1024,
|
| 32 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 33 |
+
"on_device_sampling": false,
|
| 34 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 35 |
+
"output_logits": false,
|
| 36 |
+
"pp_degree": 1,
|
| 37 |
+
"sequence_length": 1024,
|
| 38 |
+
"sequence_parallel_enabled": false,
|
| 39 |
+
"speculation_length": 0,
|
| 40 |
+
"start_rank_id": 0,
|
| 41 |
+
"target": "trn1",
|
| 42 |
+
"torch_dtype": "float16",
|
| 43 |
+
"tp_degree": 2
|
| 44 |
+
},
|
| 45 |
+
"num_attention_heads": 32,
|
| 46 |
+
"num_experts_per_tok": 2,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 8,
|
| 49 |
+
"num_local_experts": 8,
|
| 50 |
+
"output_router_logits": false,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_theta": 10000.0,
|
| 53 |
+
"router_aux_loss_coef": 0.001,
|
| 54 |
+
"router_jitter_noise": 0.0,
|
| 55 |
+
"sliding_window": 4096,
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 32000
|
| 59 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/yujiepan/phi-4-tiny-random/2ebdbeae8a00090a231f.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"embd_pdrop": 0.0,
|
| 13 |
+
"hidden_act": "silu",
|
| 14 |
+
"hidden_size": 16,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 32,
|
| 17 |
+
"max_position_embeddings": 16384,
|
| 18 |
+
"model_type": "phi3",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 24 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 1024,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 2,
|
| 48 |
+
"num_hidden_layers": 2,
|
| 49 |
+
"num_key_value_heads": 1,
|
| 50 |
+
"original_max_position_embeddings": 16384,
|
| 51 |
+
"partial_rotary_factor": 1.0,
|
| 52 |
+
"resid_pdrop": 0.0,
|
| 53 |
+
"rms_norm_eps": 1e-05,
|
| 54 |
+
"rope_scaling": null,
|
| 55 |
+
"rope_theta": 250000,
|
| 56 |
+
"sliding_window": null,
|
| 57 |
+
"tie_word_embeddings": false,
|
| 58 |
+
"use_cache": true,
|
| 59 |
+
"vocab_size": 100352
|
| 60 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6f05187d051ac3c3e46.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 8,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 16,
|
| 14 |
+
"layer_types": [
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention"
|
| 17 |
+
],
|
| 18 |
+
"max_position_embeddings": 32768,
|
| 19 |
+
"max_window_layers": 1,
|
| 20 |
+
"model_type": "qwen2",
|
| 21 |
+
"neuron": {
|
| 22 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 23 |
+
"batch_size": 1,
|
| 24 |
+
"capacity_factor": null,
|
| 25 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 26 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"fused_qkv": false,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"local_ranks_size": 2,
|
| 32 |
+
"max_batch_size": 1,
|
| 33 |
+
"max_context_length": 1024,
|
| 34 |
+
"max_topk": 256,
|
| 35 |
+
"n_active_tokens": 1024,
|
| 36 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 37 |
+
"on_device_sampling": true,
|
| 38 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 39 |
+
"output_logits": false,
|
| 40 |
+
"pp_degree": 1,
|
| 41 |
+
"sequence_length": 1024,
|
| 42 |
+
"sequence_parallel_enabled": false,
|
| 43 |
+
"speculation_length": 0,
|
| 44 |
+
"start_rank_id": 0,
|
| 45 |
+
"target": "trn1",
|
| 46 |
+
"torch_dtype": "bfloat16",
|
| 47 |
+
"tp_degree": 2
|
| 48 |
+
},
|
| 49 |
+
"num_attention_heads": 4,
|
| 50 |
+
"num_hidden_layers": 2,
|
| 51 |
+
"num_key_value_heads": 2,
|
| 52 |
+
"rms_norm_eps": 1e-06,
|
| 53 |
+
"rope_scaling": {
|
| 54 |
+
"factor": 4.0,
|
| 55 |
+
"original_max_position_embeddings": 32768,
|
| 56 |
+
"rope_type": "yarn",
|
| 57 |
+
"type": "yarn"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 1000000.0,
|
| 60 |
+
"sliding_window": null,
|
| 61 |
+
"tie_word_embeddings": false,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"use_sliding_window": false,
|
| 64 |
+
"vocab_size": 152064
|
| 65 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/18af48e9de9305ddddd5.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 4,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 2,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 1024,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 1024,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 1024,
|
| 70 |
+
"sequence_parallel_enabled": false,
|
| 71 |
+
"speculation_length": 0,
|
| 72 |
+
"start_rank_id": 0,
|
| 73 |
+
"target": "trn1",
|
| 74 |
+
"torch_dtype": "bfloat16",
|
| 75 |
+
"tp_degree": 2
|
| 76 |
+
},
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_hidden_layers": 28,
|
| 79 |
+
"num_key_value_heads": 8,
|
| 80 |
+
"rms_norm_eps": 1e-06,
|
| 81 |
+
"rope_scaling": null,
|
| 82 |
+
"rope_theta": 1000000,
|
| 83 |
+
"sliding_window": null,
|
| 84 |
+
"tie_word_embeddings": true,
|
| 85 |
+
"use_cache": true,
|
| 86 |
+
"use_sliding_window": false,
|
| 87 |
+
"vocab_size": 151669
|
| 88 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/80594a5958040f8b1ebe.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 6,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 2,
|
| 60 |
+
"max_batch_size": 6,
|
| 61 |
+
"max_context_length": 1024,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 1024,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 1024,
|
| 70 |
+
"sequence_parallel_enabled": false,
|
| 71 |
+
"speculation_length": 0,
|
| 72 |
+
"start_rank_id": 0,
|
| 73 |
+
"target": "trn1",
|
| 74 |
+
"torch_dtype": "bfloat16",
|
| 75 |
+
"tp_degree": 2
|
| 76 |
+
},
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_hidden_layers": 28,
|
| 79 |
+
"num_key_value_heads": 8,
|
| 80 |
+
"rms_norm_eps": 1e-06,
|
| 81 |
+
"rope_scaling": null,
|
| 82 |
+
"rope_theta": 1000000,
|
| 83 |
+
"sliding_window": null,
|
| 84 |
+
"tie_word_embeddings": true,
|
| 85 |
+
"use_cache": true,
|
| 86 |
+
"use_sliding_window": false,
|
| 87 |
+
"vocab_size": 151669
|
| 88 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a5cf4ee087cd9567449f.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3MoeForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"decoder_sparse_step": 2,
|
| 11 |
+
"dtype": "float32",
|
| 12 |
+
"head_dim": 32,
|
| 13 |
+
"hidden_act": "silu",
|
| 14 |
+
"hidden_size": 64,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 128,
|
| 17 |
+
"max_position_embeddings": 40960,
|
| 18 |
+
"max_window_layers": 1,
|
| 19 |
+
"mlp_only_layers": [],
|
| 20 |
+
"model_type": "qwen3_moe",
|
| 21 |
+
"moe_intermediate_size": 128,
|
| 22 |
+
"neuron": {
|
| 23 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 27 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
| 28 |
+
"continuous_batching": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"fused_qkv": false,
|
| 31 |
+
"glu_mlp": true,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"max_batch_size": 1,
|
| 34 |
+
"max_context_length": 1024,
|
| 35 |
+
"max_topk": 256,
|
| 36 |
+
"n_active_tokens": 1024,
|
| 37 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 38 |
+
"on_device_sampling": true,
|
| 39 |
+
"optimum_neuron_version": "0.4.4.dev1",
|
| 40 |
+
"output_logits": false,
|
| 41 |
+
"pp_degree": 1,
|
| 42 |
+
"sequence_length": 1024,
|
| 43 |
+
"sequence_parallel_enabled": false,
|
| 44 |
+
"speculation_length": 0,
|
| 45 |
+
"start_rank_id": 0,
|
| 46 |
+
"target": "trn1",
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"tp_degree": 2
|
| 49 |
+
},
|
| 50 |
+
"norm_topk_prob": true,
|
| 51 |
+
"num_attention_heads": 2,
|
| 52 |
+
"num_experts": 8,
|
| 53 |
+
"num_experts_per_tok": 2,
|
| 54 |
+
"num_hidden_layers": 2,
|
| 55 |
+
"num_key_value_heads": 1,
|
| 56 |
+
"output_router_logits": false,
|
| 57 |
+
"rms_norm_eps": 1e-06,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 1000000.0,
|
| 60 |
+
"router_aux_loss_coef": 0.001,
|
| 61 |
+
"sliding_window": null,
|
| 62 |
+
"tie_word_embeddings": true,
|
| 63 |
+
"use_cache": true,
|
| 64 |
+
"use_sliding_window": false,
|
| 65 |
+
"vocab_size": 151936
|
| 66 |
+
}
|
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 97794
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ef50e479badb0312e208336533316393fc797857eb5a2cde744753c266d7f3f
|
| 3 |
size 97794
|
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 410624
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671f2968ee09229cb96bb22be152df8bd197829d6ce7ad77ef854a3963b716b9
|
| 3 |
size 410624
|
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aeec7d218e22beefd303645f302d7a805df96146ecddac686eb936f8d682209
|
| 3 |
+
size 84114
|
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f7732c3ad2ac92d6eca8c6efd83baeefa27be9061021e79a069e2496d6faa00
|
| 3 |
+
size 246784
|
neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9411f69d1d13a85dfcf64804e552e540bfc24470b7f2b52f419c5eceb8f6357
|
| 3 |
+
size 254967
|
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:948a51e7715bd8232b0ce82f8cca432d73c2a709e6a794d3a228525868c057f7
|
| 3 |
+
size 93425
|
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2c30438f983884de1b30e5866ed44b55b755f839caf513e2312cbdfefc8eda3
|
| 3 |
+
size 277504
|
neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbbb4dba625a46bae0587ff94ea66196c0ebfd2bfebd5b2d8680fa00fcf7292a
|
| 3 |
+
size 288898
|
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc5d22607282b38e6ccc318f3d4d6f232ae382dbbf78a4b7ff65a37c4cdfbad8
|
| 3 |
+
size 82653
|
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bb1c68e78267670445d5a05741b98754ca8194705a9ab1bdfb3ce7d44956568
|
| 3 |
+
size 216064
|
neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:780a4a6a0d57688dd4bc494548c72ecaebbfb862d6d3e880442cc279fd2d9437
|
| 3 |
+
size 224275
|
neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b56b0d331077542e72e5da19741dfcd8d5d6bdd01d35c1cdbe74227327956a6
|
| 3 |
size 82772
|
neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 267264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1cd6ca36926fad065cd480b6e705f247170aeb533426465d808212c1b7a5123
|
| 3 |
size 267264
|
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6d63d7e965d9319351949e66cbad9a3c799e1687254fc9e9b2a96c87adb32fe
|
| 3 |
+
size 865291
|
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b4f8ca1cd6dceeea9e1f3cab2cd6d38961323966034736ce0190d3011373fd
|
| 3 |
+
size 4967424
|
neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c379355b24f769c6ef1b9dae63ff12937987e7a12f79d81471854f83c88f565c
|
| 3 |
+
size 5133948
|
neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 694128
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5231390fb7b7b3c9416cee6d063cf72f57c3e66de2333a27302fd7480aad917a
|
| 3 |
size 694128
|
neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 625664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d609c20314399dc90d7d3efc2f573991a8799ceb9c8c13a0528387d15454663
|
| 3 |
size 625664
|
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:948a51e7715bd8232b0ce82f8cca432d73c2a709e6a794d3a228525868c057f7
|
| 3 |
+
size 93425
|
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aae99c7c352a3bf0003fc6a1d3cf17bbbd7360829224f73be9a92a36fca32245
|
| 3 |
+
size 277504
|
neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fc3c1e2ef02e67ce1ef2daf774a482f12c32bcea4cf8e8c91ce3505d1ee588
|
| 3 |
+
size 288898
|
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "-O1", "--lnc=1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--internal-enable-dge-levels=vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1c364e560eed61f02284b32afbec9ee92735a7fc75f5da0fdebf4e6f62981de
|
| 3 |
+
size 91833
|
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac0a5f8c623e69c32df0f65a514ae1f29f63c1e4629f5fd16842c7f8630970cf
|
| 3 |
+
size 369664
|
neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad55b6aa538df835c51877539b04aa7ec79969d2cfbbc39edc3ecc7feb5ee267
|
| 3 |
+
size 379225
|
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04fa3d25bccfae87c411912a1318f7b09223d1a267d2024c09aef48a926aec16
|
| 3 |
+
size 85024
|
neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1060286be5259ba3f3a1e6e70edf309b3a5f9c2b9edfa4b7f049ca9554af484f
|
| 3 |
+
size 277504
|