dacorvo HF Staff commited on 21 days ago

Commit

7022925

verified ·

1 Parent(s): 70ffed2

Synchronizing local compiler cache.

Browse files

Files changed (29) hide show

.gitattributes +3 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json +63 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json +63 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json +64 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json +64 -0
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log +68 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb +1 -1

.gitattributes CHANGED Viewed

@@ -7138,3 +7138,6 @@ neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.neff
 neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.33363.0+82129205/MODULE_18b928d4e4846c6ce9c7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_50933980a498d1588ed1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/91f14718a400c0c5b075.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/meta-llama/Llama-3.1-8B-Instruct/91f14718a400c0c5b075.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/9df84ad1fbfa85b8d13d.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/9df84ad1fbfa85b8d13d.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09e0a10b183a161e170b7493bd4a9dd182667f89f8ece1f2494608b23675002d
+size 1138401

neuronxcc-2.21.33363.0+82129205/MODULE_120ca30bc72b9b137b57+6170d8e1/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0327a16b5dc1c49f216660acc51e727a2252d83407be1baa8614ef282c93eca4
+size 19641344

neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69c5fd2d4dfd20c75dce3791e0fd8cc442c5400ddb65c5f47d56d6732f929c16
+size 719476

neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0884d9ab358c84af50e93771b230fe5cdb3cc2da04eec969d86a7abd8123c345
+size 997167

neuronxcc-2.21.33363.0+82129205/MODULE_26c79ecf56e80170a14e+6170d8e1/model.log ADDED Viewed

	@@ -0,0 +1,68 @@

+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.hlo_module.pb', '--output', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [NLA001]  Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+Process Process-1:
+Traceback (most recent call last):
+  File "neuronxcc/driver/jobs/WalrusDriver.py", line 539, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
+  File "neuronxcc/driver/Job.py", line 238, in neuronxcc.driver.Job.Job.shellCommand
+subprocess.CalledProcessError: Command '['/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/starfish/bin/walrus_driver', '--optlevel', '2', '--allocator', 'coloring', '--verbose', '35', '--logfile-verbose', '20', '--logfile', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt', '--execute-repetition', '1', '-i', 'bir.json', '--min_split_size', '10240', '--skip_split_vns', '', '--no_split_dram', '--split_huge_dram_tensor', '1.0', '--preprocessing_only', '--max_tensorizer_distance', '64', '--pack_same_shape_only', '--instruction_fetch_latency', '511', '--max-partitions', '1', '--policy', '3', '--auxflag', '0', '--interleave', 'none', '--schedule-delayed-latency', '1', '--postsched-mm-accum-reorder=false', '--max-load-lower-bound', '0.14', '--force-prefetch-follow-incoming-order', '-1', '--allreduce-buffer-size', '500', '--dram-page-size', '512', '--dram-rotation-size', '-1', '--allreduce-rotation-dis', '8', '--repeat-load-thres', '4', '--enable-mm-transpose-remat-optimization=true', '--save-len-thres', '512', '--save-dma-cnt-thres', '32', '--print-format', 'json', '--relaxed-order=true', '--enable-anti-dependence-reduction=false', '--num-semaphores-per-queue', '16', '--numcores', '1', '--act-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/pwp/pwp_bin_trainium/act_info.json', '--dve-root-json', '/home/ubuntu/optimum-neuron/.venv/lib/python3.10/site-packages/neuronxcc/dve/dve_bin_gen2/dve_info.json', '--unified-backend-and-legacy-codegen', '--enable-verifier=true', '--enable-birsim=false', '--enable-birsim-sync-only=false', '--enable-data-race-checker=false', '--enable-new-backend=true', '--inject-error=NONE', '--dge-levels', 'scalar_dynamic_offset,io,vector_dynamic_offsets', '--dynamic-dma-scratch-size-per-partition=16384', '--neff-output-filename', '/tmp/nxd_model/chunked_prefill/_tp0_bk0/model.MODULE_26c79ecf56e80170a14e+6170d8e1.neff']' returned non-zero exit status 1.
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1364, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
+  File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
+  File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
+  File "neuronxcc/driver/Pipeline.py", line 30, in neuronxcc.driver.Pipeline.Pipeline.runSingleInput
+  File "neuronxcc/driver/jobs/WalrusDriver.py", line 366, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.run
+  File "neuronxcc/driver/Job.py", line 359, in neuronxcc.driver.Job.SingleInputJob.run
+  File "neuronxcc/driver/Job.py", line 385, in neuronxcc.driver.Job.SingleInputJob.runOnState
+  File "neuronxcc/driver/jobs/WalrusDriver.py", line 991, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runSingleInput
+  File "neuronxcc/driver/jobs/WalrusDriver.py", line 550, in neuronxcc.driver.jobs.WalrusDriver.WalrusDriver.runWalrusDriver
+neuronxcc.driver.Exceptions.CompilerInternalError: Non-signal exit. Backend exited with code 1 and stderr: [NLA001]  Unhandled exception with message: [json.exception.parse_error.101] parse error at line 1, column 1: attempting to parse an empty input; check that your input string or stream contains the expected JSON - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "neuronxcc/driver/ContextUtils.py", line 25, in neuronxcc.driver.ContextUtils.chdir.__exit__
+FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0'
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "neuronxcc/driver/CommandDriver.py", line 339, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1390, in neuronxcc.driver.commands.CompileCommand.CompileCommand.run
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1341, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
+  File "neuronxcc/driver/commands/CompileCommand.py", line 1373, in neuronxcc.driver.commands.CompileCommand.CompileCommand.runPipeline
+  File "neuronxcc/driver/GlobalState.py", line 102, in neuronxcc.driver.GlobalState.FinalizeGlobalState
+  File "neuronxcc/driver/GlobalState.py", line 82, in neuronxcc.driver.GlobalState._GlobalStateImpl.shutdown
+  File "/usr/lib/python3.10/shutil.py", line 715, in rmtree
+    onerror(os.lstat, path, sys.exc_info())
+  File "/usr/lib/python3.10/shutil.py", line 713, in rmtree
+    orig_st = os.lstat(path)
+FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/neuronxcc-f4mlvojj'
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "neuronxcc/driver/CommandDriver.py", line 346, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand_in_process
+  File "neuronxcc/driver/CommandDriver.py", line 341, in neuronxcc.driver.CommandDriver.CommandDriver.run_subcommand
+  File "neuronxcc/driver/CommandDriver.py", line 125, in neuronxcc.driver.CommandDriver.handleError
+  File "/usr/lib/python3.10/logging/__init__.py", line 1506, in error
+    self._log(ERROR, msg, args, **kwargs)
+  File "/usr/lib/python3.10/logging/__init__.py", line 1624, in _log
+    self.handle(record)
+  File "/usr/lib/python3.10/logging/__init__.py", line 1634, in handle
+    self.callHandlers(record)
+  File "/usr/lib/python3.10/logging/__init__.py", line 1696, in callHandlers
+    hdlr.handle(record)
+  File "/usr/lib/python3.10/logging/__init__.py", line 968, in handle
+    self.emit(record)
+  File "/usr/lib/python3.10/logging/__init__.py", line 1216, in emit
+    self.stream = self._open()
+  File "/usr/lib/python3.10/logging/__init__.py", line 1201, in _open
+    return open_func(self.baseFilename, self.mode,
+FileNotFoundError: [Errno 2] No such file or directory: '/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt'

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d89321cf197c8909f26412855cae21850d77301d0ab013b344fb30575a4a7b5
+size 728309

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7dff02b9d351d1d8c9461f01f9409fd21e2e118d101112e6e577cdc79664333
+size 7117824

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f90ab0964874ee64d7464bea3df754dd92e3a9b03a300d19dc9db7063702dbb1
+size 7264840

neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96e3d97e25d25fe5cd9f093170f55e9a283e671a0fc7b5fcc6c2a375210a05ce
+size 923054

neuronxcc-2.21.33363.0+82129205/MODULE_89c37c678d033822c960+24129607/model.log ADDED Viewed

	@@ -0,0 +1 @@

+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_89c37c678d033822c960+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-03-10T15:03:56Z [XTP004] Number of instructions (6603944) is over the threshold (5000000). - Compile under --optlevel=1 to create smaller subgraphs or use pipeline parallelism.

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12bcc80b2ad2cb1e12abbca61cd3f324f6f4f564cf78d121d0f5fcd01305c683
 size 451319

 version https://git-lfs.github.com/spec/v1
+oid sha256:b364b15f88cdba27d3ac4973f7dcfb2cbc49da9abd971bc1f0c52315364d55dc
 size 451319

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdf04a70eb8bda2f7f4e49bedb2c346136c7e9b0dec32fa7421ea72474563fbe
 size 2509824

 version https://git-lfs.github.com/spec/v1
+oid sha256:f173e6c1f419b379767e720c82df3bec9c7942c49ca6ce29905fdea0ee240610
 size 2509824

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c84f3a91584fb71a18f3ba16dd9c1cde9917ff28ab72e432ab03a61ad722f8dc
 size 2583911

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cde9a5e1a781062038054f87a06889f4898ef6a5b5a50ae52638bd62e04342a
 size 2583911

neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:726697ae6d69106a35fc6caab871c35bccbf129873b887f75986cfbdd687e625
 size 588724

 version https://git-lfs.github.com/spec/v1
+oid sha256:19abd02589b7e770cc7d22a41832a1d7c1b18e5db1cdbd0f57e95939951b2311
 size 588724

neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7c258ba5a15e6c435b0719ac65ac7aed62119f5d3d3dc497bc8557452b5afc7
 size 1659904

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e9a16db24435f3344d68819315ac43e7ec035fdf41c68ba517491e1cc7db394
 size 1659904

neuronxcc-2.21.33363.0+82129205/MODULE_c69743a3b026cb8c0f9e+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd30f302fc03f3e2cbf90294931da3ae0f5faa5a5f0cf021340988ef20175088
 size 1782293

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee0ea35ca5a7f2dda442d215bb13c572d23bc55b448a956b0f57522e111c3d11
 size 1782293

neuronxcc-2.21.33363.0+82129205/MODULE_da7543b73fbd9c77b695+6170d8e1/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83abf1bb517dbb8632d7feb8ba420485545377443e5ea7695fc1a16ef7dad001
 size 1538064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d62b0089ca06aace851336de76d8714df772fc1fd25f2fb6cccca122c1411c0
 size 1538064