dacorvo HF Staff commited on Feb 5

Commit

7b9dd22

verified ·

1 Parent(s): 66983f0

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +25 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/0dfe723550263376ebbf.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/468de8d6877b1b5efb87.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/c5697348fd90121b22ca.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/dbf0d3cf63a4b9c18134.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/e14467c71bee5cf6b4df.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/dbf0d3cf63a4b9c18134.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json +95 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json +87 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json +95 -0
neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.log +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.done +0 -0

.gitattributes CHANGED Viewed

@@ -16108,3 +16108,28 @@ neuronxcc-2.21.33363.0+82129205/MODULE_efea8e54de072c4916ed+a32116a7/model.neff
 neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_1d1ee05517b3b870704a+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_1d1ee05517b3b870704a+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/0dfe723550263376ebbf.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/468de8d6877b1b5efb87.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/c5697348fd90121b22ca.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 6,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 6,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/dbf0d3cf63a4b9c18134.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 6,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 6,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/e14467c71bee5cf6b4df.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/dbf0d3cf63a4b9c18134.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 6,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 6,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9773e8a89bdb9d5fee0d87e79df3820ba15a88789076f3b4252aebfc5bc43deb
 size 3503104

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc81f037061f94e0f1fe3bc5bfffd4b5fdf4af0c261ea6c5614688d745efd2f4
 size 3503104

neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:535c1e201a5c612381965a264ba0ef64089144dd65205ec97f664971d65b498f
 size 3640161

 version https://git-lfs.github.com/spec/v1
+oid sha256:d92da39d5f156be691d57f5d60e9338aa9e80b18cc4d267ddd41a8d32e1768cc
 size 3640161

neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a7be97d2012aa41b5f56bf5bf86357a0dae77fd27821fa3e0737aba2fe5521e
 size 97794

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ef50e479badb0312e208336533316393fc797857eb5a2cde744753c266d7f3f
 size 97794

neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ff6c1f2a8f76328f3ffd5cd9cbb7ad62552ac54af96be0b68b754f3e79f2c6e
 size 410624

 version https://git-lfs.github.com/spec/v1
+oid sha256:671f2968ee09229cb96bb22be152df8bd197829d6ce7ad77ef854a3963b716b9
 size 410624

neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a60932659847098e53497491131db9516488a7e3c1c8ecc184ba9bbe4e3fb9a
+size 662166

neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4aa75cf62fd51441ac2a86aac870181fe1e178d38f055c50431a749169deef59
+size 18484224

neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b6460ecb77b0df76fab544cee5f5c477229fd70e65cd0377841933f5251ae36
+size 1134023

neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.log ADDED Viewed

	@@ -0,0 +1 @@

+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_0a5cbcb0f0d605182b40+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_0a5cbcb0f0d605182b40+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']:

neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2480c10b0a667a77bc9f969b614c1c1627c7c0b72a47251ae70598baa2b8ef8c
+size 1160172

neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2dd6a56a6d6d101f9dc7e21594bc27ca343f95faa1e79f666426a8bc49de066f
+size 5274624

neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1965465ca9aae5d3723f6c6e5eb6986498f071af8bc72caec566f9e9dab8a8e8
+size 5411681

neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ca0a9c8719d71e823180edfc6eba4a8842e74134383b6713cb51cd6134d5fb7
 size 596952

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6361b23631f22b3534873d4bc3fde1c4197b34dc2d5d84b8097108cc7faed45
 size 596952

neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81f3a436a6014e92728a2af8e77d0d85eabeb5f40ccf61c11b4f0dcac0806751
 size 1936384

 version https://git-lfs.github.com/spec/v1
+oid sha256:0be47707e53a60759826fc077df760c768f27f3699574a1ca42a2c1721ac66bc
 size 1936384

neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3ec97e51a42515c9ce13acbfba6965d41f7e0554d97ca9f9ab94caebd61aaa5
 size 2092575

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e23e21a3d39349b0a3b066fd29eb841c6bb9dba1fde90c7a0792c5717638a18
 size 2092575

neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21b1204a4001c8a692290e9265478a1dbf162d41df0bba52b580fd22ab3a8735
 size 865460

 version https://git-lfs.github.com/spec/v1
+oid sha256:b77f0fd54de2b3f0e4d80ee388867e6ab1f396c5fd6c14be411f63d198649118
 size 865460

neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e245ced68ece76c2e986e8d3bd5daf8aa2cb73ddc6d8aa87dfdf32196593303
 size 36148224

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc4ee4b8b64d4dc6330fdd8fae2947adf4f88ba10b2505b8b51907e13863538d
 size 36148224

neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccd8dd07c208eb87030d40b6fe53666dbd98b0ad6fa3cf5fa46bd1e5d023563f
+size 662166

neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62694a86179db903be5f78ff966af1ec798da0d36a389aafbb60c4ae28079012
+size 27689984

neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c14b363d2172922ab0e3ce4c160951e7e0305a0d219a88349452f0fb6def3a75
+size 1305951

neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:995e8864ea6a0be8f610b9cb61e158a28ed3a39eeb96f2160c3f43f0287d7dc4
+size 98520064

neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cfb81e51d1d924b03b36057b58cd756ac41d7388c73a9178ad22baccef42412
+size 1305951

neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f7c100d4815aaffc226b9a25ef68cd5df5d7d3c333d97bfc35720818ad6bceb
+size 36250624

neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:329e621507127750dc610f95907a7e901219e2f34aaef4c17d140feef4a6c579
 size 1061093

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1c161937d0eeeb2eca0dfcca2535ee497d38d1e83b3dbc45f21ccf5a0100888
 size 1061093

neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91ba54a6131873512c8412ed7a9972adca35b2f18f36e528de0b048bc3c788de
 size 9473024

 version https://git-lfs.github.com/spec/v1
+oid sha256:af2c4a236a1f97d56e0840ca8099bf268c1fad8b15295cae8f906ae0c5534a0b
 size 9473024

neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4b6486a12378501e2484cf81b9f6885eabca76a7d817e3e2041c1736bac16d7
 size 82772

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b56b0d331077542e72e5da19741dfcd8d5d6bdd01d35c1cdbe74227327956a6
 size 82772

neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c65b6b3a5f7c34ae231694981cf76347fb89393c25990695cd4f921f3d243e3
 size 267264

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1cd6ca36926fad065cd480b6e705f247170aeb533426465d808212c1b7a5123
 size 267264

neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.done ADDED Viewed

File without changes