Synchronizing local compiler cache.
Browse files- .gitattributes +3 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json +64 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log +68 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo +1 -1
- neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb +1 -1
.gitattributes
CHANGED
|
@@ -7138,3 +7138,6 @@ neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff
|
|
| 7138 |
neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7139 |
neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7140 |
neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 7138 |
neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7139 |
neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7140 |
neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7141 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7142 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7143 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 32,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 8,
|
| 30 |
+
"max_batch_size": 32,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev2",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 1024,
|
| 40 |
+
"sequence_length": 4096,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 8
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 32,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 8,
|
| 30 |
+
"max_batch_size": 32,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev2",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 1024,
|
| 40 |
+
"sequence_length": 4096,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 8
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 4,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 0,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 4,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 0,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09e0a10b183a161e170b7493bd4a9dd182667f89f8ece1f2494608b23675002d
|
| 3 |
+
size 1138401
|
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0327a16b5dc1c49f216660acc51e727a2252d83407be1baa8614ef282c93eca4
|
| 3 |
+
size 19641344
|
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69c5fd2d4dfd20c75dce3791e0fd8cc442c5400ddb65c5f47d56d6732f929c16
|
| 3 |
+
size 719476
|
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0884d9ab358c84af50e93771b230fe5cdb3cc2da04eec969d86a7abd8123c345
|
| 3 |
+
size 997167
|
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.hlo_module.pb', '--output', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
|
| 2 |
+
Process Process-1:
|
| 3 |
+
Traceback (most recent call last):
|
| 4 |
+
File "neuronxcc/driver/jobs/WalrusDriver.py", line 539, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
|
| 5 |
+
File "neuronxcc/driver/Job.py", line 238, in neuronxcc.driver.Job.Job.shellCommand
|
| 6 |
+
subprocess.CalledProcessError: Command '['/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/starfish/bin/walrus_driver', '--optlevel', '2', '--allocator', 'coloring', '--verbose', '35', '--logfile-verbose', '20', '--logfile', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--execute-repetition', '1', '-i', 'bir.json', '--min_split_size', '10240', '--skip_split_vns', '', '--no_split_dram', '--split_huge_dram_tensor', '1.0', '--preprocessing_only', '--max_tensorizer_distance', '64', '--pack_same_shape_only', '--instruction_fetch_latency', '511', '--max-partitions', '1', '--policy', '3', '--auxflag', '0', '--interleave', 'none', '--schedule-delayed-latency', '1', '--postsched-mm-accum-reorder=false', '--max-load-lower-bound', '0.14', '--force-prefetch-follow-incoming-order', '-1', '--allreduce-buffer-size', '500', '--dram-page-size', '512', '--dram-rotation-size', '-1', '--allreduce-rotation-dis', '8', '--repeat-load-thres', '4', '--enable-mm-transpose-remat-optimization=true', '--save-len-thres', '512', '--save-dma-cnt-thres', '32', '--print-format', 'json', '--relaxed-order=true', '--enable-anti-dependence-reduction=false', '--num-semaphores-per-queue', '16', '--numcores', '1', '--act-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/pwp/pwp_bin_trainium/act_info.json', '--dve-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/dve/dve_bin_gen2/dve_info.json', '--unified-backend-and-legacy-codegen', '--enable-verifier=true', '--enable-birsim=false', '--enable-birsim-sync-only=false', '--enable-data-race-checker=false', '--enable-new-backend=true', '--inject-error=NONE', '--dge-levels', 'scalar_dynamic_offset,io,vector_dynamic_offsets', '--dynamic-dma-scratch-size-per-partition=16384', '--neff-output-filename', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff']' returned non-zero exit status 1.
|
| 7 |
+
|
| 8 |
+
During handling of the above exception, another exception occurred:
|
| 9 |
+
|
| 10 |
+
Traceback (most recent call last):
|
| 11 |
+
File "neuronxcc/driver/commands/CompileCommand.py", line 1364, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
|
| 12 |
+
File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
|
| 13 |
+
File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
|
| 14 |
+
File "neuronxcc/driver/Pipeline.py", line 30, in neuronxcc.driver.Pipeline.Pipeline.runSingleInput
|
| 15 |
+
File "neuronxcc/driver/jobs/WalrusDriver.py", line 366, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.run
|
| 16 |
+
File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
|
| 17 |
+
File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
|
| 18 |
+
File "neuronxcc/driver/jobs/WalrusDriver.py", line 991, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runSingleInput
|
| 19 |
+
File "neuronxcc/driver/jobs/WalrusDriver.py", line 550, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
|
| 20 |
+
neuronxcc.driver.Exceptions.CompilerInternalError: Non-signal exit. Backend exited with code 1 and stderr: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
During handling of the above exception, another exception occurred:
|
| 24 |
+
|
| 25 |
+
Traceback (most recent call last):
|
| 26 |
+
File "neuronxcc/driver/ContextUtils.py", line 25, in neuronxcc.driver.ContextUtils.chdir.__exit__
|
| 27 |
+
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0'
|
| 28 |
+
|
| 29 |
+
During handling of the above exception, another exception occurred:
|
| 30 |
+
|
| 31 |
+
Traceback (most recent call last):
|
| 32 |
+
File "neuronxcc/driver/CommandDriver.py", line 339, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
|
| 33 |
+
File "neuronxcc/driver/commands/CompileCommand.py", line 1390, in neuronxcc.driver.commands.CompileCommand.CompileCommand.run
|
| 34 |
+
File "neuronxcc/driver/commands/CompileCommand.py", line 1341, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
|
| 35 |
+
File "neuronxcc/driver/commands/CompileCommand.py", line 1373, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
|
| 36 |
+
File "neuronxcc/driver/GlobalState.py", line 102, in neuronxcc.driver.GlobalState.FinalizeGlobalState
|
| 37 |
+
File "neuronxcc/driver/GlobalState.py", line 82, in neuronxcc.driver.GlobalState._GlobalStateImpl.shutdown
|
| 38 |
+
File "/usr/lib/python3.10/shutil.py", line 715, in rmtree
|
| 39 |
+
onerror(os.lstat, path, sys.exc_info())
|
| 40 |
+
File "/usr/lib/python3.10/shutil.py", line 713, in rmtree
|
| 41 |
+
orig_st = os.lstat(path)
|
| 42 |
+
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/neuronxcc-f4mlvojj'
|
| 43 |
+
|
| 44 |
+
During handling of the above exception, another exception occurred:
|
| 45 |
+
|
| 46 |
+
Traceback (most recent call last):
|
| 47 |
+
File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
|
| 48 |
+
self.run()
|
| 49 |
+
File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
|
| 50 |
+
self._target(*self._args, **self._kwargs)
|
| 51 |
+
File "neuronxcc/driver/CommandDriver.py", line 346, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand_in_process
|
| 52 |
+
File "neuronxcc/driver/CommandDriver.py", line 341, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
|
| 53 |
+
File "neuronxcc/driver/CommandDriver.py", line 125, in neuronxcc.driver.CommandDriver.handleError
|
| 54 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1506, in error
|
| 55 |
+
self._log(ERROR, msg, args, **kwargs)
|
| 56 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1624, in _log
|
| 57 |
+
self.handle(record)
|
| 58 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1634, in handle
|
| 59 |
+
self.callHandlers(record)
|
| 60 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1696, in callHandlers
|
| 61 |
+
hdlr.handle(record)
|
| 62 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 968, in handle
|
| 63 |
+
self.emit(record)
|
| 64 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1216, in emit
|
| 65 |
+
self.stream = self._open()
|
| 66 |
+
File "/usr/lib/python3.10/logging/__init__.py", line 1201, in _open
|
| 67 |
+
return open_func(self.baseFilename, self.mode,
|
| 68 |
+
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt'
|
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d89321cf197c8909f26412855cae21850d77301d0ab013b344fb30575a4a7b5
|
| 3 |
+
size 728309
|
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7dff02b9d351d1d8c9461f01f9409fd21e2e118d101112e6e577cdc79664333
|
| 3 |
+
size 7117824
|
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f90ab0964874ee64d7464bea3df754dd92e3a9b03a300d19dc9db7063702dbb1
|
| 3 |
+
size 7264840
|
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e3d97e25d25fe5cd9f093170f55e9a283e671a0fc7b5fcc6c2a375210a05ce
|
| 3 |
+
size 923054
|
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-03-10T15:03:56Z [XTP004] Number of instructions (6603944) is over the threshold (5000000). - Compile under --optlevel=1 to create smaller subgraphs or use pipeline parallelism.
|
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 451319
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b364b15f88cdba27d3ac4973f7dcfb2cbc49da9abd971bc1f0c52315364d55dc
|
| 3 |
size 451319
|
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2509824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f173e6c1f419b379767e720c82df3bec9c7942c49ca6ce29905fdea0ee240610
|
| 3 |
size 2509824
|
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2583911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cde9a5e1a781062038054f87a06889f4898ef6a5b5a50ae52638bd62e04342a
|
| 3 |
size 2583911
|
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 588724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19abd02589b7e770cc7d22a41832a1d7c1b18e5db1cdbd0f57e95939951b2311
|
| 3 |
size 588724
|
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1659904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e9a16db24435f3344d68819315ac43e7ec035fdf41c68ba517491e1cc7db394
|
| 3 |
size 1659904
|
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1782293
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee0ea35ca5a7f2dda442d215bb13c572d23bc55b448a956b0f57522e111c3d11
|
| 3 |
size 1782293
|
neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1538064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d62b0089ca06aace851336de76d8714df772fc1fd25f2fb6cccca122c1411c0
|
| 3 |
size 1538064
|