dacorvo HF Staff commited on
Commit
7022925
·
verified ·
1 Parent(s): 70ffed2

Synchronizing local compiler cache.

Browse files
Files changed (29) hide show
  1. .gitattributes +3 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json +63 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json +63 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json +64 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json +64 -0
  6. neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json +1 -0
  7. neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done +0 -0
  8. neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb +3 -0
  9. neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff +3 -0
  10. neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json +1 -0
  11. neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb +3 -0
  12. neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json +1 -0
  13. neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb +3 -0
  14. neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log +68 -0
  15. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json +1 -0
  16. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done +0 -0
  17. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +3 -0
  18. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +3 -0
  19. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +3 -0
  20. neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json +1 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb +3 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log +1 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb +1 -1
  24. neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +1 -1
  25. neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +1 -1
  26. neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb +1 -1
  27. neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff +1 -1
  28. neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo +1 -1
  29. neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb +1 -1
.gitattributes CHANGED
@@ -7138,3 +7138,6 @@ neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff
7138
  neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
7139
  neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
7140
  neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
7138
  neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
7139
  neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
7140
  neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7141
+ neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
7142
+ neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
7143
+ neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 32,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 8,
30
+ "max_batch_size": 32,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev2",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 1024,
40
+ "sequence_length": 4096,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 8
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 32,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 8,
30
+ "max_batch_size": 32,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev2",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 1024,
40
+ "sequence_length": 4096,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 8
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 0,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 0,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e0a10b183a161e170b7493bd4a9dd182667f89f8ece1f2494608b23675002d
3
+ size 1138401
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0327a16b5dc1c49f216660acc51e727a2252d83407be1baa8614ef282c93eca4
3
+ size 19641344
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c5fd2d4dfd20c75dce3791e0fd8cc442c5400ddb65c5f47d56d6732f929c16
3
+ size 719476
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0884d9ab358c84af50e93771b230fe5cdb3cc2da04eec969d86a7abd8123c345
3
+ size 997167
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.hlo_module.pb', '--output', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
2
+ Process Process-1:
3
+ Traceback (most recent call last):
4
+ File "neuronxcc/driver/jobs/WalrusDriver.py", line 539, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
5
+ File "neuronxcc/driver/Job.py", line 238, in neuronxcc.driver.Job.Job.shellCommand
6
+ subprocess.CalledProcessError: Command '['/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/starfish/bin/walrus_driver', '--optlevel', '2', '--allocator', 'coloring', '--verbose', '35', '--logfile-verbose', '20', '--logfile', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--execute-repetition', '1', '-i', 'bir.json', '--min_split_size', '10240', '--skip_split_vns', '', '--no_split_dram', '--split_huge_dram_tensor', '1.0', '--preprocessing_only', '--max_tensorizer_distance', '64', '--pack_same_shape_only', '--instruction_fetch_latency', '511', '--max-partitions', '1', '--policy', '3', '--auxflag', '0', '--interleave', 'none', '--schedule-delayed-latency', '1', '--postsched-mm-accum-reorder=false', '--max-load-lower-bound', '0.14', '--force-prefetch-follow-incoming-order', '-1', '--allreduce-buffer-size', '500', '--dram-page-size', '512', '--dram-rotation-size', '-1', '--allreduce-rotation-dis', '8', '--repeat-load-thres', '4', '--enable-mm-transpose-remat-optimization=true', '--save-len-thres', '512', '--save-dma-cnt-thres', '32', '--print-format', 'json', '--relaxed-order=true', '--enable-anti-dependence-reduction=false', '--num-semaphores-per-queue', '16', '--numcores', '1', '--act-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/pwp/pwp_bin_trainium/act_info.json', '--dve-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/dve/dve_bin_gen2/dve_info.json', '--unified-backend-and-legacy-codegen', '--enable-verifier=true', '--enable-birsim=false', '--enable-birsim-sync-only=false', '--enable-data-race-checker=false', '--enable-new-backend=true', '--inject-error=NONE', '--dge-levels', 'scalar_dynamic_offset,io,vector_dynamic_offsets', '--dynamic-dma-scratch-size-per-partition=16384', '--neff-output-filename', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff']' returned non-zero exit status 1.
7
+
8
+ During handling of the above exception, another exception occurred:
9
+
10
+ Traceback (most recent call last):
11
+ File "neuronxcc/driver/commands/CompileCommand.py", line 1364, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
12
+ File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
13
+ File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
14
+ File "neuronxcc/driver/Pipeline.py", line 30, in neuronxcc.driver.Pipeline.Pipeline.runSingleInput
15
+ File "neuronxcc/driver/jobs/WalrusDriver.py", line 366, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.run
16
+ File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
17
+ File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
18
+ File "neuronxcc/driver/jobs/WalrusDriver.py", line 991, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runSingleInput
19
+ File "neuronxcc/driver/jobs/WalrusDriver.py", line 550, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
20
+ neuronxcc.driver.Exceptions.CompilerInternalError: Non-signal exit. Backend exited with code 1 and stderr: [NLA001] Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
21
+
22
+
23
+ During handling of the above exception, another exception occurred:
24
+
25
+ Traceback (most recent call last):
26
+ File "neuronxcc/driver/ContextUtils.py", line 25, in neuronxcc.driver.ContextUtils.chdir.__exit__
27
+ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0'
28
+
29
+ During handling of the above exception, another exception occurred:
30
+
31
+ Traceback (most recent call last):
32
+ File "neuronxcc/driver/CommandDriver.py", line 339, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
33
+ File "neuronxcc/driver/commands/CompileCommand.py", line 1390, in neuronxcc.driver.commands.CompileCommand.CompileCommand.run
34
+ File "neuronxcc/driver/commands/CompileCommand.py", line 1341, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
35
+ File "neuronxcc/driver/commands/CompileCommand.py", line 1373, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
36
+ File "neuronxcc/driver/GlobalState.py", line 102, in neuronxcc.driver.GlobalState.FinalizeGlobalState
37
+ File "neuronxcc/driver/GlobalState.py", line 82, in neuronxcc.driver.GlobalState._GlobalStateImpl.shutdown
38
+ File "/usr/lib/python3.10/shutil.py", line 715, in rmtree
39
+ onerror(os.lstat, path, sys.exc_info())
40
+ File "/usr/lib/python3.10/shutil.py", line 713, in rmtree
41
+ orig_st = os.lstat(path)
42
+ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/neuronxcc-f4mlvojj'
43
+
44
+ During handling of the above exception, another exception occurred:
45
+
46
+ Traceback (most recent call last):
47
+ File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
48
+ self.run()
49
+ File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
50
+ self._target(*self._args, **self._kwargs)
51
+ File "neuronxcc/driver/CommandDriver.py", line 346, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand_in_process
52
+ File "neuronxcc/driver/CommandDriver.py", line 341, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
53
+ File "neuronxcc/driver/CommandDriver.py", line 125, in neuronxcc.driver.CommandDriver.handleError
54
+ File "/usr/lib/python3.10/logging/__init__.py", line 1506, in error
55
+ self._log(ERROR, msg, args, **kwargs)
56
+ File "/usr/lib/python3.10/logging/__init__.py", line 1624, in _log
57
+ self.handle(record)
58
+ File "/usr/lib/python3.10/logging/__init__.py", line 1634, in handle
59
+ self.callHandlers(record)
60
+ File "/usr/lib/python3.10/logging/__init__.py", line 1696, in callHandlers
61
+ hdlr.handle(record)
62
+ File "/usr/lib/python3.10/logging/__init__.py", line 968, in handle
63
+ self.emit(record)
64
+ File "/usr/lib/python3.10/logging/__init__.py", line 1216, in emit
65
+ self.stream = self._open()
66
+ File "/usr/lib/python3.10/logging/__init__.py", line 1201, in _open
67
+ return open_func(self.baseFilename, self.mode,
68
+ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt'
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d89321cf197c8909f26412855cae21850d77301d0ab013b344fb30575a4a7b5
3
+ size 728309
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7dff02b9d351d1d8c9461f01f9409fd21e2e118d101112e6e577cdc79664333
3
+ size 7117824
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f90ab0964874ee64d7464bea3df754dd92e3a9b03a300d19dc9db7063702dbb1
3
+ size 7264840
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e3d97e25d25fe5cd9f093170f55e9a283e671a0fc7b5fcc6c2a375210a05ce
3
+ size 923054
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log ADDED
@@ -0,0 +1 @@
 
 
1
+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-03-10T15:03:56Z [XTP004] Number of instructions (6603944) is over the threshold (5000000). - Compile under --optlevel=1 to create smaller subgraphs or use pipeline parallelism.
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12bcc80b2ad2cb1e12abbca61cd3f324f6f4f564cf78d121d0f5fcd01305c683
3
  size 451319
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b364b15f88cdba27d3ac4973f7dcfb2cbc49da9abd971bc1f0c52315364d55dc
3
  size 451319
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdf04a70eb8bda2f7f4e49bedb2c346136c7e9b0dec32fa7421ea72474563fbe
3
  size 2509824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f173e6c1f419b379767e720c82df3bec9c7942c49ca6ce29905fdea0ee240610
3
  size 2509824
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84f3a91584fb71a18f3ba16dd9c1cde9917ff28ab72e432ab03a61ad722f8dc
3
  size 2583911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cde9a5e1a781062038054f87a06889f4898ef6a5b5a50ae52638bd62e04342a
3
  size 2583911
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:726697ae6d69106a35fc6caab871c35bccbf129873b887f75986cfbdd687e625
3
  size 588724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19abd02589b7e770cc7d22a41832a1d7c1b18e5db1cdbd0f57e95939951b2311
3
  size 588724
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7c258ba5a15e6c435b0719ac65ac7aed62119f5d3d3dc497bc8557452b5afc7
3
  size 1659904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e9a16db24435f3344d68819315ac43e7ec035fdf41c68ba517491e1cc7db394
3
  size 1659904
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd30f302fc03f3e2cbf90294931da3ae0f5faa5a5f0cf021340988ef20175088
3
  size 1782293
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0ea35ca5a7f2dda442d215bb13c572d23bc55b448a956b0f57522e111c3d11
3
  size 1782293
neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83abf1bb517dbb8632d7feb8ba420485545377443e5ea7695fc1a16ef7dad001
3
  size 1538064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d62b0089ca06aace851336de76d8714df772fc1fd25f2fb6cccca122c1411c0
3
  size 1538064