dacorvo HF Staff commited on Mar 13

Commit

75f5ba3

verified ·

1 Parent(s): 93886a6

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4e1fa9712696dd29a0bc.json +95 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80514be9a02fddc9c476.json +95 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-4B/80514be9a02fddc9c476.json +95 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/96baef0f2a01e5e29193.json +88 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/d73c51dc7dd75010abe9.json +63 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d182199b5d53c98ec562.json +64 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/24e7f0a205508b46b0eb.json +96 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6e2489df1bc1e5c5af63.json +96 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a34b748f9038b1e376dc.json +96 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/d73c51dc7dd75010abe9.json +63 -0
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-4B/24e7f0a205508b46b0eb.json +96 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/compile_flags.json +1 -0
neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.done +0 -0
neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.hlo_module.pb +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff +3 -0
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/compile_flags.json +1 -0

.gitattributes CHANGED Viewed

@@ -7234,3 +7234,14 @@ neuronxcc-2.21.33363.0+82129205/MODULE_1662e89e76097dde374a+24129607/model.neff
 neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_789b4718541dd2474859+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_ff54f59684fbb72ef7e9+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_bbee1996ee8025809e33+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4e1fa9712696dd29a0bc.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/80514be9a02fddc9c476.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-4B/80514be9a02fddc9c476.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/96baef0f2a01e5e29193.json ADDED Viewed

	@@ -0,0 +1,88 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-0.6B",
+    "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/c54cbc73d9a74e547bf7ca1feb2b290b641ed261e32f7c08baba5633884f1298/d73c51dc7dd75010abe9.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d182199b5d53c98ec562.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 2,
+    "max_context_length": 512,
+    "max_topk": 256,
+    "n_active_tokens": 512,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 512,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/24e7f0a205508b46b0eb.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/6e2489df1bc1e5c5af63.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/a34b748f9038b1e376dc.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 2048,
+    "max_topk": 256,
+    "n_active_tokens": 2048,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 2048,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/d73c51dc7dd75010abe9.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 1024,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "vocab_size": 128256
+}

neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-4B/24e7f0a205508b46b0eb.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.6.dev3",
+    "output_logits": false,
+    "pp_degree": 1,
+    "prefill_chunk_size": 0,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1418a3bc01d777bbefb8b367e96770bb02210c82476f56c4d78293cc1a7ea8bc
+size 848773

neuronxcc-2.21.33363.0+82129205/MODULE_0352823045bfbd34fd3d+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bc69ce1b547d85de7a704e3830e155358ac4147366aab81cd690f7274405844
+size 4619264

neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c92837716166f7cb39c6297bc51bacaccdd8471f7a7cf4eb5a90aa139ca3f28b
+size 628810

neuronxcc-2.21.33363.0+82129205/MODULE_04dc6de9513dd8bb05c4+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e794bd527bf159e760f7e574be86936f0708fb508abd78488a3d208f0ae63803
+size 22672384

neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8223a0880f04437df80f89de1868b68be53988eb9a109c31fbf2423e0595ab55
+size 684041

neuronxcc-2.21.33363.0+82129205/MODULE_1759fced9dd9a678973e+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31b87a1ef2e12d7cf6675e391738773cafeccfb746d2312644814bef9e634af2
+size 7732224

neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1eab0e1e36ccc80e01b9fc5973caceec04c50f7f81d61ab27925c6145addeec
+size 412156

neuronxcc-2.21.33363.0+82129205/MODULE_1824b665798bf04e2aa2+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cde39dbd1fb71e45804b6ac56f97815205e6a7d6a7deac0af9414593434681b
+size 2796544

neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d79d2330194768dee128be01cd0cbea2a63911f50e0c35b64ea4960ea48c493
+size 848917

neuronxcc-2.21.33363.0+82129205/MODULE_24a7a312e7cf7a6b6c5d+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e1571335377de3b6f476c55def0ba070f37fddb4a06253cf0f8b14da0e22e80
+size 11254784

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b82959150728b621645262435120008706a4ef119a943f7db4a6d90aeb430c7
 size 728309

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d89321cf197c8909f26412855cae21850d77301d0ab013b344fb30575a4a7b5
 size 728309

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ed190bc44698b591685d8b6feb0da87c5d1629abe304ab17569a3510a1a0fab
 size 7117824

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7dff02b9d351d1d8c9461f01f9409fd21e2e118d101112e6e577cdc79664333
 size 7117824

neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e34e480fdf7a4270bca4077b8ded8f08218d466c960d9cd822ee3690dc8e334
 size 7264840

 version https://git-lfs.github.com/spec/v1
+oid sha256:f90ab0964874ee64d7464bea3df754dd92e3a9b03a300d19dc9db7063702dbb1
 size 7264840

neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a344a8bfb0bd70d150a25d375f0a7ad1432403ec9c9a9455db46debc4a8e84eb
+size 727217

neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20e6a3e924fa9631c44d91b12bc5ea4e843680266ab0e765a4989e699c430fea
+size 3472384

neuronxcc-2.21.33363.0+82129205/MODULE_88833531cb6c582d4830+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdacf6d0dbac59c6eb12d348bb6273380fbbc85b63605b48e5bff0b6d951e062
+size 3610166

neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_9068b179aef4ffa33207+6c043b9a/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ffa25d516e37232d7dd03d3456190d81b046f1be6c8899eb4d314ef378edd56
+size 1171934

neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.done ADDED Viewed

File without changes

neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b61cf92085320a14cfb797578aa3fb39f5cba66690c0019cba79b75a2b244a32
+size 1165790

neuronxcc-2.21.33363.0+82129205/MODULE_92b96f10a3841c3c7919+6c043b9a/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5a5214416bbbf27c2666b320c779196f07ad6ebfd0ef705474d73bf5b4c3817
+size 49859584

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12bcc80b2ad2cb1e12abbca61cd3f324f6f4f564cf78d121d0f5fcd01305c683
 size 451319

 version https://git-lfs.github.com/spec/v1
+oid sha256:b364b15f88cdba27d3ac4973f7dcfb2cbc49da9abd971bc1f0c52315364d55dc
 size 451319

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdf04a70eb8bda2f7f4e49bedb2c346136c7e9b0dec32fa7421ea72474563fbe
 size 2509824

 version https://git-lfs.github.com/spec/v1
+oid sha256:f173e6c1f419b379767e720c82df3bec9c7942c49ca6ce29905fdea0ee240610
 size 2509824

neuronxcc-2.21.33363.0+82129205/MODULE_95b9072f246645f24461+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c84f3a91584fb71a18f3ba16dd9c1cde9917ff28ab72e432ab03a61ad722f8dc
 size 2583911

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cde9a5e1a781062038054f87a06889f4898ef6a5b5a50ae52638bd62e04342a
 size 2583911

neuronxcc-2.21.33363.0+82129205/MODULE_a431a415019174ce0469+6c043b9a/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_encoding/_tp0_bk0/log-neuron-cc.txt"]