diff --git a/.gitattributes b/.gitattributes index 1c68ea7d82c7d88aa800cad93c259ca226c02cdb..08864dac7f1b60b116b88d9f5c6aa505c20dd638 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16108,3 +16108,28 @@ neuronxcc-2.21.33363.0+82129205/MODULE_efea8e54de072c4916ed+a32116a7/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_119a20f83dc85c879ef7+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_1d1ee05517b3b870704a+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/0dfe723550263376ebbf.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/0dfe723550263376ebbf.json new file mode 100644 index 0000000000000000000000000000000000000000..bf002c05bcd6ac4a957108b48bd97ca3fc801f59 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/0dfe723550263376ebbf.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/468de8d6877b1b5efb87.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/468de8d6877b1b5efb87.json new file mode 100644 index 0000000000000000000000000000000000000000..445059804a05a07dfb488df15fbf34a959d7b9ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/468de8d6877b1b5efb87.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/c5697348fd90121b22ca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/c5697348fd90121b22ca.json new file mode 100644 index 0000000000000000000000000000000000000000..0c8186e12e459321bda1d75513636d329890d920 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/c5697348fd90121b22ca.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/dbf0d3cf63a4b9c18134.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/dbf0d3cf63a4b9c18134.json new file mode 100644 index 0000000000000000000000000000000000000000..051219fb57497809172e32ac8810537ed9d94113 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/dbf0d3cf63a4b9c18134.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/e14467c71bee5cf6b4df.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/e14467c71bee5cf6b4df.json new file mode 100644 index 0000000000000000000000000000000000000000..a4c0d88f8ba1263f48b813fcb4dae6cf10620639 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/e14467c71bee5cf6b4df.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/dbf0d3cf63a4b9c18134.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/dbf0d3cf63a4b9c18134.json new file mode 100644 index 0000000000000000000000000000000000000000..051219fb57497809172e32ac8810537ed9d94113 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/dbf0d3cf63a4b9c18134.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d2e5a4913a6235391b59b3be0cb4b5320c05ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json new file mode 100644 index 0000000000000000000000000000000000000000..908b2c13d2c33a7f8b5ade7c62948a0201d2f00c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d2e5a4913a6235391b59b3be0cb4b5320c05ca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json new file mode 100644 index 0000000000000000000000000000000000000000..908b2c13d2c33a7f8b5ade7c62948a0201d2f00c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-4B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-4B", + "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff index afd2e68e547aa0c3af5b930867d260a3ba430d94..4178c8c1826d302992afaf5394242c4c5b2beb54 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9773e8a89bdb9d5fee0d87e79df3820ba15a88789076f3b4252aebfc5bc43deb +oid sha256:dc81f037061f94e0f1fe3bc5bfffd4b5fdf4af0c261ea6c5614688d745efd2f4 size 3503104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo index 93c8b8d840e2f01bc08c0a5f16e189363db37c95..c30f8d19a04b245c503eee8f9a340962bef62afe 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_003d3d9afcec48e81ba4+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:535c1e201a5c612381965a264ba0ef64089144dd65205ec97f664971d65b498f +oid sha256:d92da39d5f156be691d57f5d60e9338aa9e80b18cc4d267ddd41a8d32e1768cc size 3640161 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb index 910fb21ca78c9b5103637fe5ba28e6ecd2162ea9..e1c7a90ca8e1b18d6f623143b55987c66f3a0976 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a7be97d2012aa41b5f56bf5bf86357a0dae77fd27821fa3e0737aba2fe5521e +oid sha256:8ef50e479badb0312e208336533316393fc797857eb5a2cde744753c266d7f3f size 97794 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff index 887c66d40f263958cddbd9115ba918fa59003dbf..e4bfe59d6155f39fb2dc7b02432b14d2efc62266 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ff6c1f2a8f76328f3ffd5cd9cbb7ad62552ac54af96be0b68b754f3e79f2c6e +oid sha256:671f2968ee09229cb96bb22be152df8bd197829d6ce7ad77ef854a3963b716b9 size 410624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..85a56f02b45bf8a9d7ae1ea1bb53774329b608a5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a60932659847098e53497491131db9516488a7e3c1c8ecc184ba9bbe4e3fb9a +size 662166 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bf8860e5999eaffe0a770749558d6bf102a2d8ff --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_065f10c75a3a003852db+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa75cf62fd51441ac2a86aac870181fe1e178d38f055c50431a749169deef59 +size 18484224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20442449b038e0a9265ab8943607040352a62cb1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6460ecb77b0df76fab544cee5f5c477229fd70e65cd0377841933f5251ae36 +size 1134023 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.log new file mode 100644 index 0000000000000000000000000000000000000000..548ce27c52af33b64e960ab7259d88c934b95caa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0a5cbcb0f0d605182b40+24129607/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_0a5cbcb0f0d605182b40+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_0a5cbcb0f0d605182b40+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b72c53abfbd9c2dc55e7b494fddfac84c3d2b74 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2480c10b0a667a77bc9f969b614c1c1627c7c0b72a47251ae70598baa2b8ef8c +size 1160172 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de14fd126b5180a978dc1ee2145a76b24aa7e78c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd6a56a6d6d101f9dc7e21594bc27ca343f95faa1e79f666426a8bc49de066f +size 5274624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1829e6659e64db9aaba8cea65dc4c49eeec3587d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8f2cac7949cfdcbe7c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1965465ca9aae5d3723f6c6e5eb6986498f071af8bc72caec566f9e9dab8a8e8 +size 5411681 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb index 2b843c21a9eca44a50823d9d159c8f60a770c4e3..7bce5095c949934184123fb96719ea53a2057667 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ca0a9c8719d71e823180edfc6eba4a8842e74134383b6713cb51cd6134d5fb7 +oid sha256:c6361b23631f22b3534873d4bc3fde1c4197b34dc2d5d84b8097108cc7faed45 size 596952 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff index 0552ae495415e2fcb5a2d07605f66ac6f209b512..d9d3bb858681b0723bda25f49abcb33571303d5d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81f3a436a6014e92728a2af8e77d0d85eabeb5f40ccf61c11b4f0dcac0806751 +oid sha256:0be47707e53a60759826fc077df760c768f27f3699574a1ca42a2c1721ac66bc size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo index 38ac5374b75e241384b3ba0683ac74cddf771e5f..b83454042532437969142d6d7309a8a4feca0510 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1934c2fc93e7793f0aef+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3ec97e51a42515c9ce13acbfba6965d41f7e0554d97ca9f9ab94caebd61aaa5 +oid sha256:9e23e21a3d39349b0a3b066fd29eb841c6bb9dba1fde90c7a0792c5717638a18 size 2092575 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb index 2f9624c1eefa19fab1b31fc6f0b11744b23cfe6a..57b324cfac621f09e5b76754f3064d2bd3d88a50 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21b1204a4001c8a692290e9265478a1dbf162d41df0bba52b580fd22ab3a8735 +oid sha256:b77f0fd54de2b3f0e4d80ee388867e6ab1f396c5fd6c14be411f63d198649118 size 865460 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff index 0a56f967b5ef928fce83c9cedd5f39e67b48abf7..5bc62ccff49928f839b0e38f0104812d45543b92 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1944f672afeaa6480c61+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e245ced68ece76c2e986e8d3bd5daf8aa2cb73ddc6d8aa87dfdf32196593303 +oid sha256:bc4ee4b8b64d4dc6330fdd8fae2947adf4f88ba10b2505b8b51907e13863538d size 36148224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab6d1e215c42b3381bf5d7f8a3b4d6e1ffcc9dda --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd8dd07c208eb87030d40b6fe53666dbd98b0ad6fa3cf5fa46bd1e5d023563f +size 662166 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff6f73d04cc1887ab685e718ab4a6ecf332d2486 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1aace3454acab4662925+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62694a86179db903be5f78ff966af1ec798da0d36a389aafbb60c4ae28079012 +size 27689984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec852f27ee987374474e108b8cb12afb4caa93a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14b363d2172922ab0e3ce4c160951e7e0305a0d219a88349452f0fb6def3a75 +size 1305951 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d7160601595dd3eefc35559f0c95202fd45c3b3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1cc0ec46877f3aa47ae6+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995e8864ea6a0be8f610b9cb61e158a28ed3a39eeb96f2160c3f43f0287d7dc4 +size 98520064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1be892d4540b713ec0bc98fdaf658ba15a27ddb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfb81e51d1d924b03b36057b58cd756ac41d7388c73a9178ad22baccef42412 +size 1305951 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ee8e0dab9e57173fe3786e78de8406d329a69947 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_21fc076779532e6e1e41+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7c100d4815aaffc226b9a25ef68cd5df5d7d3c333d97bfc35720818ad6bceb +size 36250624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb index ca9bae0de2f377eb55116ac5ffac7016164602ea..2454aac239b8ec369102b0184785d212d54d5a33 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:329e621507127750dc610f95907a7e901219e2f34aaef4c17d140feef4a6c579 +oid sha256:c1c161937d0eeeb2eca0dfcca2535ee497d38d1e83b3dbc45f21ccf5a0100888 size 1061093 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff index 3e1feb5f78b53c3696f5f0308c66aa2185433a9e..20ddb57879805fbb4d393d858b69286efb35950a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_250ecf58b3a2e7ac5757+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91ba54a6131873512c8412ed7a9972adca35b2f18f36e528de0b048bc3c788de +oid sha256:af2c4a236a1f97d56e0840ca8099bf268c1fad8b15295cae8f906ae0c5534a0b size 9473024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb index a151e0db66c9936e4eb4c88ec4b2d7c748e10e2d..d396e291c54b3b1e1c83a9761d827ea5c2b5a5a3 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4b6486a12378501e2484cf81b9f6885eabca76a7d817e3e2041c1736bac16d7 +oid sha256:6b56b0d331077542e72e5da19741dfcd8d5d6bdd01d35c1cdbe74227327956a6 size 82772 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff index cb31650fc005c2de4ee977a846b33ff3a5fb7cb3..24f815ffa5bd57a7edac132c9d55d934ee370402 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c65b6b3a5f7c34ae231694981cf76347fb89393c25990695cd4f921f3d243e3 +oid sha256:d1cd6ca36926fad065cd480b6e705f247170aeb533426465d808212c1b7a5123 size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0926c4540cc4c51b4f6be6bd81d2143b5bacffd1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:785aa90f0c02edea7fa7bfa3f027911590216e6ba5d1fcfda24f035fcda9a26b +size 970945 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9855356b990f9e60cb8c8cb600056e798d74cc15 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ed2d7356322fb4775653475be3f414ca0bc2bca5fcda90c348c7c666f05f65 +size 2417664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..27242a68660f00fdba23b6dc471be54058db7e11 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_27dcb29f0a28b1d6f51e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a763fab4b07f6395b05cc4d75afd8b08056ad5dad787b3befaccf11460dd31c +size 2554955 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fd0200c89032660ad01908166c47c54be46ec27f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bfd87783375e038bf25e0e2d8810154384e5f8e2e73640820a95ef156e16d6 +size 793180 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb85cc03f88d364116802323ce4c5f38f87b57f5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121a4a45c22c23eaa0eb9bc94dcd4f162557e2c0574c2f9eefb1d7bcd069895b +size 242791424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.hlo_module.pb index d052929de82acd1187f858e74ed2076c39543f57..966c1aaad545da211cfa74e20afa9dc61b2a3d94 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cdf2caa53226440029bfe2eff23ccba4cbbc28b423da3ef47c8dd7787f1adc2 +oid sha256:9bacb2f2f540eaff839d412bc9e371b6354cf8f97831c89ac14fcad4da672c55 size 661362 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff index 437b1e2b0834f58cfce595c7cc85051e1f1a2566..0d236132b203efa0292be95377b2a1813256e9ee 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6f8474b6176cc9f7006a4850c4520077e0e9baf6c7c3b800df0ac89236cd334 +oid sha256:b096fd09961ede9fc78f22a88f39e2023b064d98e4ecf8cae48c55a4e5c7b80e size 18473984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2fa826853951fded06a7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2fa826853951fded06a7+24129607/model.neff index 67917b93844dd4bb0c32d25c0cfef3bbbb70a341..60c10426c58b573d2829043e4801708ae93a17e9 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2fa826853951fded06a7+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2fa826853951fded06a7+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73bc57670880a1d8c14b09b534496a5bb7cc1f46c31e5ce981f37b619dcd2a5d +oid sha256:1a4c05c79c0e8b54b3c2cad96b4dc4555973c79789563b4a43e5dce546268ae2 size 36148224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/model.neff index f180a3ac59042bcbaf0a9707591c35c3c9adc5cd..4f809cd67362761109ea5e7b0fd40f064284f87d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c9ce6e5dbe86ee5e3ee3af585888bf707419b3429fbbe6715735f2e750e7186 +oid sha256:e5477d813971bbd860779e46013216672c82d9307c9018cdd6df9a2cbdc45dbf size 3503104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/wrapped_neff.hlo index 4c48fe14bbfd7f045878e6b908c13c843acbdeb3..0703f5fb9fc7eedd8cb588bd5d8f693b8777e87c 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_313c4e4e44b74c64a01a+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18bbd8164abb30f44a7b3893042bab4ff76c686ef675f41461bcc30dd730315b +oid sha256:f4231b25c02fede81b37a9b22d913b0989eaf5281d447b2a9f30304db4c3527d size 3640161 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.hlo_module.pb index eaa293b4f71a668dfd800b85378e442f16ff772e..781ef29d24dabc23eb054215bc19b2abc3f419bb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43084b28c1eb1e4e35a8ce5107944cf673a708ff6fdcca63d8eb5db3610ddc9e +oid sha256:6f113c10c77741cc08a19adc7d5e09e035a03a03d751dbefede3d51d70aa3a37 size 426769 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.neff index 3c1d4bb93e5fca421abf6d91fc90f3e8d828bef3..8b5e9b5929c041f51bc1955afc24d21acd9eecbb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8b2b09210453b06bb5291f939a26f54542cf10e5825b38365c7048649a22586 +oid sha256:6f6b941c15ed66c686b8acff2b452ead98bcf0a038a2569fb7a79463ea49703a size 3073024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/wrapped_neff.hlo index 69a4ec3d18d5f7201c52266637ff1b0c20707812..ddaba73d698a3c624e122b4d2fcbf3d56b483c5f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3c75648f1b6d85bd72d5+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bf07a3c100d60a4846c9553cebcf878350a91cdc83b9e21f41497b5e29a9353 +oid sha256:3251669900d2348ad53608811a4ffc8d5adb276028f5b97a24b9dcb52014473d size 3147125 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.hlo_module.pb index d6ec8f50958e8d2e4ce15f18cddcde3d6e39eaba..8e197c567018eaadae5985e26111641ed208c2fb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64694dab15395d5f9cbbf32f45ee8924868bc1c2e8052c947c86c68c533b0420 +oid sha256:d007bb7f19dc61262140698229d888d0b068426fac10d9b9b91caf164ce03f62 size 923430 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.neff index f4a243064cdf6402784f67094405d26f78708214..02757a845d768b8e2a635157bcfbb1e8fcbbe156 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_42601fbe748660a77be0+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1605afbb7b8dfa13b0d1ab9a10c86b98ac20a19d12cd861502b0974964a9627f +oid sha256:4afe796b0d005955e0e3c76c735b50c83ce7b5c0ca9c0cd5b76b031131b4f6e4 size 5213184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.hlo_module.pb index 58b844b6061c1f9a1ab0b3c6c889c594ba91dd4b..b4c40605da7851ffe567c9778de9d2083d306e1a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca80d55257caff0cc88a182611da0df7d95d1903c9dfe7bf87cf178ffb39718d +oid sha256:185054b8ba80e7f52847989d5a58e3fb4aade61a671dfa510960053999998743 size 423848 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.neff index 623cc7349e31b11ca390261219fa1e4057305709..6c6159f8d47db8fd8279f29a598f88cf5f4cba96 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ac5ed244908e0b23eb7+677eeb9d/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d70ad414aa3573103cbfde1ce85a53ec1a0481420517552d1a3c956e83bb630b +oid sha256:c9edfbb33bf7c99e688e8c548d7dd44eb866e3c89386ccf98942ae4694ebfeba size 3687424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..963b7503f0325335e403b763a6c257f90002777d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380e8daab3755d24ca283194dee7bf62115d60c453a0dcc4ca2559cfaef5aeeb +size 1151980 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e744f169142eeac2d15afbaa30dab34e9265a2dc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21cca7984347d42a5555a33b8dffa6db01358974ccdcbace582bafbd3b126ed2 +size 3625984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e88baa94db7e813cc5e06f616feda30a79ce902c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4cf1e5f6e4c17bb56bb0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25fcc3e40dbdbcff2461d85f0290309279af24fab756e76d7f2d5f48a80b986 +size 3763041 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.hlo_module.pb index 9704ecf1639b344805be684c2ded122cea54d55c..895edb3462afd732c644c9f7bdcba520ae1d2f4f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dffab56f351fd6047a44f5ebd5da77b5b9ff225cbccc10f1838035d418f90930 +oid sha256:9e6541330eaced8ef5bd62804c6e06991d2bcb38f68c71b2cabb4ce571665de2 size 1050679 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.neff index 99bdbdb85c437f2560d064e1104bb4c119c3726a..4706e6ef2fc9d02aea23b7d7dd1b7f62c0776276 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4ec35bb57fbe1fdb6c78+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50bc7c0ea9062790faf1ff7831453d65c8d1d51cde620c8e380820e4486a5f9d +oid sha256:d387ff6d3dd9954294cec3810fadabd0718735a038aaebd74ab358c209fd2b88 size 5827584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..003c3f9732c535070e63dfe2d47cde73a68b4119 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa17af5c8272f5870d89784464e1f6463bb2af7d487a9ce20b02d6a1125f74c7 +size 669001 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d6d300f040911304b9d9cfd4f388838dbde5725f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78acd71a7779fc6a4c0ff2a5d54c020bc3ab88c747ba9ff05aa12a5590d00a2e +size 24955904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb index 96b0b9d0aac667bdfb4b89f452b77fd0695d787e..dbe1f9441e2a7b2c4185bbafced0748b740f4d63 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76c10df9f9fadac3ce5fee74c4470b2cdade440cf97718b4545a4e3de7fc54aa +oid sha256:5231390fb7b7b3c9416cee6d063cf72f57c3e66de2333a27302fd7480aad917a size 694128 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff index ea50519fb23dc2dea11f1fddc04774675adc6431..b4b9c29f0411998756002266e85b7d95ef151b1d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f816d25661f62f7f838c0800cb426bb2a2a552c974bc9c93181f83780ef36d8 +oid sha256:1d609c20314399dc90d7d3efc2f573991a8799ceb9c8c13a0528387d15454663 size 625664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.hlo_module.pb index 3b4e1caf8515f00e3ac37c3a1dfdba8ecbc8165c..ca2139b0a1c89d82bfa0225cb7b5731633c2c9f3 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19511d8804f5867708960e2d2fe363464f9fab523e00714e2bb658fd0dcf7d66 +oid sha256:e684a3c433b3d1931b42c868bc1f8e187d8d344041c428cfd571ae78019cda22 size 872065 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.neff index 32d9b98722be808e147b384df7ffd9d0acfed0ef..85e79360d6e09dcd11d7ee7195d717c34e7e6256 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1f800118fbab2a3f0e76ea675155dee4ac7c7c81344422aaf3a1d9410fa8c8c +oid sha256:558d00184fa1bac07430de16a73ee90da6291d87bdca5c37a0861521f936e86f size 6759424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/wrapped_neff.hlo index ae0aac5addc49da5e5dc1eff828aca17a6d3900b..ac67cf1b1f6b08f88b88277daf86dc0d968f2f51 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5a1775d2dd85719fa52a+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e19063f7564465530256f4c433085a999fd5c7f620fd46742c22e971a9c2a098 +oid sha256:f3f72238ea26ef23b1555bbfebd0581b0e7192da33c47dabc912a8987fe7a106 size 6925948 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/model.neff index c82043ade41edd2245f21b0612779e7cf36225d8..1d19ec58ea30fbc60460e92a65f667d9ff589ff4 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d00a067a62b1f1860deb777f444b9cb08cb6f877ec885847d64ea73763b0efc +oid sha256:8f7a47d761c50e5ce0c9150e15d0e9f32726afb1e95a6072553758b37a853663 size 3073024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/wrapped_neff.hlo index 7ea34a25aa5497e5bb5b6816f001ddd754ed0800..2ec9b817684a199180fefb951640f29396b46be0 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_740c88e5b7c7bd2f9bae+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26442807d050083846013c76054618f0984ad4569d6bc245666e9114a5c0a2cb +oid sha256:c366c2aa9312137e4985670cf006f130a78f6f383ce45c8c327a270a772b4bf8 size 3147125 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.hlo_module.pb index 8efac31d5864de2a0cbd583784f247093eaa7a94..b5354b7b5a65a43ef1260aa0e8de13980762d080 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d98bb4abcef46bc4dfcb539ece861af724c555279e78c4ba6ef27c16f25cecc +oid sha256:649ff83e0cbee13abda84e02c1a114e8b60d780456161e42f68c99c57744691c size 739558 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.neff index b8556e1e2744f691770610f5d28bb67d270daeb5..7ecd1ce442179565edc8bb4d1660d21aff8162f3 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78a467fd5a1d9f7adeef+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01d131442ec0615f97346844e44a2b093f093f77430729c4c6a75c4ec1267984 +oid sha256:40dfa607b020ef9e891d5a4dca71cd75f67a5ecc7a86e72b2146a17479837b20 size 26133504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9a0c51641f001bc37180ac1c66545e91b64dca82 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22a67788c464a4c8d78838646a5854fad916f6fcb96004c989a7eb5af67c0ed +size 1305951 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fd8291d94efebded3cdd0d5863485a540a580bea --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_79eb3abf001c934aa416+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3514df74437731caf4111758f70df12219b13b0cad80b4a75dda72ab95a1b3 +size 98520064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.hlo_module.pb index abd8b71c1a7c59ba9f9f685af7f4e1b45a51fe57..ee3c8bec8796a02f9da35400a1ebb43b69b8d3cc 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f68e091382df6b2a367f3477146b8d9c0a52add2f649439a665ea4015a6d45b +oid sha256:f7174f1b2be736cf81d0d1787eaa09341057fe414cbbc02d6c9b12a31d6d1402 size 661362 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff index 166116a8ce7b39ddb314c768310d49e25a0d465d..a00d63e822d6c03474c6bbda72034d9e3286eafd 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1250e91de34fcad28ca625e64918d074cb9bec25c6593ba9a988d35b448c1cfd +oid sha256:9e31980eef984a5dca7919bec01552bcd34f96c43a1391d0fb74dc4a5a650955 size 27689984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..23955c8aa04f815ede8d03ac78b87c5e3e85ec7a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2714c67b1ba11d32ae54c9fc937f170e94ac320fac329665daf9757735019b5f +size 830704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.log new file mode 100644 index 0000000000000000000000000000000000000000..8fee8d8f7cbb0e3912a35c3e22ba1b883d0ff498 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9826c0fef852bf790f19+a02c3a36/model.log @@ -0,0 +1,2 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation/_tp0_bk0/model.MODULE_9826c0fef852bf790f19+a02c3a36.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation/_tp0_bk0/model.MODULE_9826c0fef852bf790f19+a02c3a36.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: 2026-01-23T14:56:28Z 2026-01-23 14:56:28.139327: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF009] Memory requirement exceeds target architecture's HBM limit. Needed 20355489816 bytes (18 GB) vs. available 17179869184 bytes (16 GB). TIP: Consider using smaller batches or applying model parallelism + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83d4c0e610c489035ee34fd59a6f9e8974da5728 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5306874f906d5fb3a8c3738c52e12279328ef60273dfc8eb2fc05e58a6bfb3d3 +size 669069 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..34db0f534d031425e556d86471667c2091afed27 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de41b268c848143a516fc6a93a1aba30c9c219d1f5530fc8e87ff3ee7a9e9563 +size 25037824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.hlo_module.pb index 78b5cc8665d05cb8f7c7db976481945ecb1bb354..2dfe28bd941cdcb1aa0cd8e62b7727c1b2ce8f58 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18debfa97c09c988925a1aaaa8b9c0f8f4cd2b025cbeb08d7ef3bfc4ec9a865f +oid sha256:dfa7ee587c6dd7eb719c810d0b3390042fe6e1ba6c8c33b36d4f10a723820f13 size 509380 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.neff index c410473e6bd09ffec233ea46ca33324c0cddd158..9120cd17e6add4b59847b4c481e64723b78e789d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9bc06d024b9ce47e5f56+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:693b83d4f6771c87b8387a60f6b448121dd1c389524ed32f8e1e427f5e1aaacd +oid sha256:6fcc51a8a2631ef64a3fc55d99615f28707984ec5d38a3df36fa25006a98fe9a size 41585664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/model.neff index 6dfc3b39a64ae6384a22e8750bf1a363f6cca3e8..97ce758dae65295731f1803d803ded3f316ad263 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b700e53c7323708f3c63e59bb1860f7782dbeb3d0a487869aa5f4ab23989151 +oid sha256:4abcbcd9ed97fc54df10216f947fd987bb4a49d0d9dd32dcb0020f5700e0505a size 6933504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/wrapped_neff.hlo index 8a3086feba376f428e59e75565e23ec53bf01736..da9e6f6ac2c6cbf41e43d04854bdf148e6d137e4 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c53a5ac284b649e893b+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:699ac7e338dc0f77e05957f9d7f60b222256aea2924dc61b2903f55dc592357f +oid sha256:6c552fcec7dd7ab6d4bc6aa95ba48435854f7a8c7ced6894a50ab596ef3526e5 size 7117697 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff index c64a91bd3edecb0e67429d35330d7e0c16e8b16f..0ce3bfe677e37bbe8937405949e98969e95b3235 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e884a06795f3dbe019d77779bf01db493d426ba8ec0ef1f7465464fa9d361a12 +oid sha256:0b32b26173495629dd59108bcb4f98b21a4b73aff9e6736bdb979ddb2c118126 size 646144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd13bfe7dc30fce0c8777cce0567cad23bb90840 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f207cb1a7972380731d86b296124f3cc6dda7c189fa3be38721aed1d2d7b9fe +size 662242 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d91c148e4d2cf6bd06b190b9f92787bb6ee41bac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a17ecef0a02d21e4d5a8+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84945b4899ad47c794c1b06492376201f1e10cc50bbcf337a91d96f80a0cf278 +size 18484224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce719c6f62ab5e0c550f12facfcadb8d9bf1b52a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7d10f169193e494212c9a80507b91a6f0e8711cb91d09e55174ab063280084 +size 1134023 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..03ead7b2c5bf50c9ad5ee1394f6709fb71f7b934 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a8c9bb79a74a03571fcd+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e83712d3a23c5fe3c9360a40afb42542e0f2d6544741945edcdfb0a0ddd76d8 +size 5141504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb index b928c9a2f728815784f9ace1a24d0c5aa729fc33..748c409b8729fc9463051767dad159bc8d1c7cdb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f1d26ab78f5dcdc49abcc6e752412007c6cc8d21445bfa1a0078c7ec9c8d610 +oid sha256:fdb4342d18a3192874a17bcc4ddeadc592624a4db04b0444c931b0f2dc4e6d9b size 761066 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..428b10829bbbf55ab13c4b548f2a5f97a72e7e7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d4acb69a20af13d41f020d46ed73cda4062df1a2e192366f1620bf4bd2f9ae +size 1160172 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba04cb3b11cfe587fa97ed1ae0a817259d5ed794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b961f4e4eaa0628af40fddc3c4446199f1be7517b91ea36e13f5335433ceeb6 +size 6882304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8347118048374550ab1630764523832e51bd413c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1b042cf3d88bb04c2e0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7a390c3b8e80e4dce7cf3e2e75e34ad938c240debc85e59c2a5834eb80efc1 +size 7019361 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0a9b14c1b903160dacbab00e1ecbcff3e2af0f1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d8b1157113d573a3781169e7cd88fed1856fda6fa5282d3d403ce400f771294 +size 1145836 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2deb547e22b6982d89d827e1247451965b1e93a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96e1f842a08248deb05da8989ffac9932be43cde99063900ae0ee29fc6e0a88 +size 2305024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f55a2e0c8f73d18679011727cc9c4db7fa8f98c3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b46fffa5d4b2e69904a7+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f87c491b0f75dacdee2d24db02a078543d5680fb176d01558914238b876023e +size 2442081 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..acbc6ad18373e64593a5c1ce00f951881da6e551 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4894daa7a63700186ae52e4113ba44c70532da051fb907e0293a1513b4a005f1 +size 1145836 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f0db703e1ebda9cf4baed049dee73ae45f6f844 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2af732b831da9ea85f28b840e50fa705fcf0d534e6f97ec96dc3beec36e570 +size 2264064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7330d5158c5e0b585eaf658cf44b11377d0bb185 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b8bea2e3eb787b70e571+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5c7546b02538f526325b31b714578c301662ec58038e6d18f9c0d2ccbe48da6 +size 2401121 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb index 2490c1fe7fef9d747c24ae4daa81eb948a183fc1..32942c066618e9d65cb12b4e62d9a5097ba42e3a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba30bac1303e4d846c40c65f8e4d8a644794b6964038372ff73e155714762bdc +oid sha256:eb45ccdda05a2a2ee1cf89ec265cdb0766071dc0512a114d5a7a007c1d523b9a size 83504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff index 3612ae764be895e726e58f5ca7f02b1d988c5d24..0bc673303d2f6a85cb41332e26052b5a2a238a8e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ba77eb824772e994518cb8cae28e31fdb69af6393a0fac70a28fb9d3d5f6d51 +oid sha256:36c9055608092f4be44de678aa77e873c858f75429d901c3851babbebe581dc4 size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb index bc9cfc879012a007d2e0baa000a3b24812f240e8..28ee1398fee500d4fd4ce4bd55db6a4f4eb78dba 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:488ac1572ce8e1b1ff2d6254afaf39e40218d5b117f226473da52d9ac7929c91 +oid sha256:5d12d23545ccb2c03c3978d33636c9589b7b6da434697f7b5507f28fd0062075 size 81516 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff index 340e7cce3851d2abdb5ee14285f6645d1dbe5088..0ebe76da33da7f535289f868ecf2170765c2801f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50269c93a4f89b9cfe1a88721a71f9262fee57c37777da36ef0127d536e4c0ee +oid sha256:1bede074a656737d1b8bb3a4c99933c04b3172880e2528fa29965fad3b087226 size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afbe5b375a3a8009dab49aeb367e92c140a9b80c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0462fd44753ca049a7d1994e0b49b1abe859640c058d9ec134ffe0f7497767a7 +size 668454 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91b6a3a2acb8ab094e50f68181ad85f90c7daf7c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1117e3cf2b2a7c7a0939b75a184f8724189b1538e95e68404fed1d7b8fe2d0b7 +size 25191424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..77904b171c7b5146c3d2e6f049bcd63c61520983 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b4f366b8d782dd4d1d1d3bb864ff53306cb866cc8a1867efc3fbb2aa112d8d +size 617945 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..78834622890200b3956caa04dc82a18eec841763 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6408a53c92a01df4a363d200f1754ac088fb38e45c17f2e3ff5edd31aff30e +size 200131584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..173fc960bd5d13b6c96994f9414ebde5aa92619f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19dadb329dc3f66fa8b357582f7722e196e3feeab0223f141742859512c1b23e +size 1056860 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfd370c35a7bac2e69ffbb0216b8c97ff6da452a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dd01d7a2913cbd8c1ba1+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d023b6fd2c6bbf87afbc5ec7f279c184c841ef9be6cdc8c1419c312399a795b +size 96932864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb index c8e158507cef0abd568e18481672a47be580132b..cf7107baae9339edc2d8373b74081ff19581db39 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbee7375c8021657bbb337b137190e79d46ae75cf40db4df9597daa481da9e5c +oid sha256:1250088885cd64fc4a0be3a10d2f8222c991a426632b4d7a86c18a890d6232ff size 90382 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff index 638d8a898464a976db0604cd8306a7f0af1c1262..868cb15bc41071fe8d10f01f207a994c89b1e9f5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30550222149aae50b173e32adaeec5e384280fdc8f3e65aa0d0a6c4ee049106d +oid sha256:de7fc6b6461262a518d9b4e6d67c1c6fe943b04806e1f726deaf29d599cb1911 size 359424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.hlo_module.pb index d40b921f650e4ca344d330f95f6380893d402d2c..e273fd0722ce8ffe0e967376ff2afbdcca6fda04 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d9150bc5aa42ad4c1035fe623592bcc639869b51cf86746a0c48faaebc5f464 +oid sha256:2913e1dfc67fb201967593ca210cad72244a745757d1e7bc3aea7db3ea05712a size 388429 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.neff index 03e794c3f8b60cfdec2a621d757acaacdce0ff23..7eff559275e2672962b48b51eedb6a04889fab7b 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e82c64993b7dad469832d89ca02c23124828e74901a02e0b125550a097819e24 +oid sha256:17d7716beb9c13eb7c1c78f894b95a728e7e5328786bca494183ffe9bac29087 size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/wrapped_neff.hlo index 18cd72b07567e1657d1d4dba3f58621774ac4443..e384e93b0290a003cdc2c110e1a74f327ea3126d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee8ab158a9c272b70f44+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad121dd45a5e872e2414bedf511fdd50223631112354b949c9c4709cacb17645 +oid sha256:e52f89184f2cfaf87162c3bfcb7b0f554b0030ad0c173ef1bb100950728fdb54 size 2029297 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.hlo_module.pb index 0400d3c20a9e3a798cc48491d3a15bde52b38caa..ecc3ca05e8ae0662f2cff8f22dbb020e8b85acef 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05a4038a70da97389bf754168583601f7fd3c730fe4edc619ed6004e6c5b4c7c +oid sha256:66355f45b5a564dad737213b1575c3673e50af834b71a571c765842a1e0bf372 size 700208 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.neff index 13d42efe82d80a40308378e1015073e7bc3b90b4..3137c2911607162b3709df115c937401fdaeaaaa 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:746fafd52eb90811cff39c5014a2c42a482e171dfbbefb934d746aea4b3e99d7 +oid sha256:2dab91a2defcdf6da09de04cd1e7c2f29a26ff093603f3c8557bc90132d43616 size 12411904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/wrapped_neff.hlo index 1864b508bcc63f62492c972c3e150fa504cb80df..6969511446904860e7649c42d766c4705a0c339c 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f2ba194e95963ca457c2+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:145ea9aeb136bfad8e0926a6aad9ede415dae6a4e80746db62c3c97a29eb1c1f +oid sha256:eb3127cde3d04cf5d9bc208ad0a3cc1b9e98493bbbdfd8ff1808bcd8a72e6858 size 12558920 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aef42dbeb26b04f856f84f1bae494446196aa05e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412d9298a727c88ef811809b3cf48655cd25ce8300fe430c2187da886fffadf2 +size 793324 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..6abc610510f70b376d8f5026c3ce1159e83d91ba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log @@ -0,0 +1,53 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f5f81a13c8e6671b9b65+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f5f81a13c8e6671b9b65+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-04T14:17:28Z +Pre-Partition Pre-Opt Histogram: +total HLO instructions: 5170 + convert 910 17.60% ################################################################ + reshape 802 15.51% ######################################################## + transpose 723 13.98% ################################################## + broadcast 547 10.58% ###################################### + slice 543 10.50% ###################################### + multiply 362 7.00% ######################### + parameter 328 6.34% ####################### + constant 221 4.27% ############### + call 217 4.20% ############### + dot 181 3.50% ############ + add 144 2.79% ########## + concatenate 74 1.43% ##### + negate 72 1.39% ##### + get-tuple-element 37 0.72% ## + iota 3 0.06% + gather 2 0.04% + tuple 1 0.02% + cosine 1 0.02% + reduce 1 0.02% + sine 1 0.02% + + +Pre-Partition Post-Op Histogram: +total HLO instructions: 4364 + convert 909 20.83% ################################################################ + reshape 870 19.94% ############################################################# + transpose 543 12.44% ###################################### + parameter 328 7.52% ####################### + constant 257 5.89% ################## + broadcast 256 5.87% ################## + slice 252 5.77% ################# + custom-call 217 4.97% ############### + multiply 217 4.97% ############### + dot 180 4.12% ############ + add 144 3.30% ########## + concatenate 74 1.70% ##### + negate 72 1.65% ##### + get-tuple-element 37 0.85% ## + gather 2 0.05% + iota 2 0.05% + cosine 1 0.02% + tuple 1 0.02% + reduce 1 0.02% + sine 1 0.02% + +Potential split-points stats: #CC 0 #AR 0 #AG 0 #BN 0 nClamp 0 +WARNING: Insufficient number of potential split points found. Entire model will be compiled as a single module. +No partitions found. Compiling as flat model +2026-02-04 14:17:28.019879: F hilo/hlo_passes/NeuronHloVerifier.cc:504] [ERROR] [NCC_VRF007] Tiled instruction count 7392334 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs + diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..406d26205a62d7bc1c6466b1bf413167a23cee9e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7969893711a56172d92c4bb78eee80280687230fb1c3c29db069f04bf97396e +size 793324 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..b3682c390188d6ce33fc93474fb72a0a54b8bcc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fb54563033b3f69f79b9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fb54563033b3f69f79b9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [LUR015] Compiler generated too many instructions (9816278). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2026-02-04T14:25:22Z Non-signal exit. Backend exited with code 1 and stderr: [LUR015] Compiler generated too many instructions (9816278). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +