diff --git a/.gitattributes b/.gitattributes index 5341c59c9506d0a8cc7417b59538d3eeb78303f2..a3965be55eb530c2e60dae09d415830411e535b1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5215,3 +5215,28 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5db2cb5268c1af1c38fd+877608f3/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_999e22b80b6484bfea64+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_e4fc31a8d9c144896bd0+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_e4fc31a8d9c144896bd0+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4ff566ae03f09752651b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4ff566ae03f09752651b.json new file mode 100644 index 0000000000000000000000000000000000000000..742d1176330d794e400fc5e65ff1b01d04100a4d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4ff566ae03f09752651b.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/88aa32108ed4101d3866.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/88aa32108ed4101d3866.json new file mode 100644 index 0000000000000000000000000000000000000000..3fc4f63bfcdab569587ab967a1e7a991979e3a07 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/88aa32108ed4101d3866.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a0f5e6280aa0f309b18e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a0f5e6280aa0f309b18e.json new file mode 100644 index 0000000000000000000000000000000000000000..51b7a0d7394469a91ef7886decf3c0d208aa3927 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a0f5e6280aa0f309b18e.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a17c0997f47472bd5dfe.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a17c0997f47472bd5dfe.json new file mode 100644 index 0000000000000000000000000000000000000000..d2ed2bcb30ac778e034eb9f3d1b67252c5cee73b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/a17c0997f47472bd5dfe.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/b7af84a78116c62fd7df.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/b7af84a78116c62fd7df.json new file mode 100644 index 0000000000000000000000000000000000000000..58974e0b3b07edb7e483023ad6da443eae87a4e3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/b7af84a78116c62fd7df.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/c2c73293c7780c0f9ef5.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/c2c73293c7780c0f9ef5.json new file mode 100644 index 0000000000000000000000000000000000000000..2646f86226b0882e7d7cd12250dd2fdc16b8838d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/c2c73293c7780c0f9ef5.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/e3cc0e33d28db8b7f878.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/e3cc0e33d28db8b7f878.json new file mode 100644 index 0000000000000000000000000000000000000000..46c34295f66c09b4b5a3981e8c81fc2649d4ad13 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/e3cc0e33d28db8b7f878.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/42056289c8690a12c97c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/42056289c8690a12c97c.json new file mode 100644 index 0000000000000000000000000000000000000000..70a31f04c18bc624aba704070d763fb10cd57785 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/42056289c8690a12c97c.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/469f61d7dad2f014bedb.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/469f61d7dad2f014bedb.json new file mode 100644 index 0000000000000000000000000000000000000000..474d9c616ec68f6de7706f07e407e68145cbe480 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/469f61d7dad2f014bedb.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/41d8000b51fc7c8f2ab9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/41d8000b51fc7c8f2ab9.json new file mode 100644 index 0000000000000000000000000000000000000000..b16ba10343390fe921f8480d73d5c609da59545b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/41d8000b51fc7c8f2ab9.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/6c0ccc000aa577aa1f25.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/6c0ccc000aa577aa1f25.json new file mode 100644 index 0000000000000000000000000000000000000000..464a695c58a92026efb5b043eef0222a02f83d7c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/6c0ccc000aa577aa1f25.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/895e022ccf1f32a63b38.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/895e022ccf1f32a63b38.json new file mode 100644 index 0000000000000000000000000000000000000000..f290f451c3ba9462e3f822617d948cb684f30871 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/895e022ccf1f32a63b38.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..459bf94c6876c3ac04c78424abcc4f777f13949d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c59baa5b152dde421f3f1e9e3f16f808025f1c0618fb8b2926234001607b85 +size 1302883 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca4b5091ffa8353fa7a7bc282688b6fd2abb0dc4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_28d01c41effbb13bf546+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa14e30a4936f07a97140de050e459675d4fc9f777bf8d1ec0562ab5d4862c9 +size 646144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff0e02ee01e12d4d6b319e6d55242ac3d3ed1057 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b005807c7a81663fa48c930a8a7dbf2921a609124c888e0cff2a467de662daf0 +size 2025695 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6bcfeff4517c2b77485b009f5e98f1d76883a3ac --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4d259dd2baf2a9646a85b04d2dfe1b883af2acb2e51efd91637ec56168353c +size 1025024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6c3934b6fe2d84aba52061a0726e09cbca14fc41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2efa5a7cdf24fddf8392+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574e10c9771029469793550328413ff4398ade0f1a5d1161cfee87bd18428468 +size 1044660 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..54e6858cf6bbda4853b9a70fd91832eeae6b942f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb157a44882f29ae948c11839e56dd2bde7fb8300f217bade959d561959b045b +size 1246857 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7038d4163787397b472f23df227c3dea08e56684 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3bcaf76326aa82946559+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fd73a3d1a4aee83cca8be21bb42baef11f3666e4e9f45b358f2d2d165a6e0b +size 523264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be8697fd1eb5cdc081fd50c7e702cadf6db7bc54 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e31115037da788f075142249fa62389f6e31d8d99649b1dbf37505118ed381a +size 1340396 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db4b7853f7408076a2b24260a6aab46c3e3718a6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2145326dcac3bcf6e6f337e58adf2e42be0822d46be97c613f5dd7d123b582 +size 523264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0da8dd9afc79362bf4b7ed7c61b1c6a66add7341 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cf09670cc966314fbac+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce51e14a9a83bd7330558c180d36592790311dbb287438eb4118e5ed89facc2 +size 534798 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9400ec44928e6ec00b7009fad4135d06b921313 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac081a7019c8c8d8b2ec2514f41ea1f888fcffed36d7987816744304e4135a23 +size 1284382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6712cb8bc3c2fb249ec1b27c073a080aaf8a0fd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a284b85f1397d9d8a3188cf7e26f99aef4acf887ab0d473607f56e0d0d99c6 +size 584704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..563fe52fef28b87c7a2e4a574daebcf0b51b7b10 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_54d943c37505f10935bc+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1462b8427b3e8651acf481a93d6401426dfec69fc658c0028ebf7b97011087fa +size 592623 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..835084869a55d56bd7dc9f863da4b9a093fafdb2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88972d7f48de3b646203567d4c481a3fa70f4779f366432dc48598c2d4320cd +size 1128085 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6954c840a2377b7012c91d65cb4f1d2498b9e09a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4866b61fbb2f9ec06f7cfe98a222dc3c8025ac5a372f6d697b97da8dc6edff2e +size 390144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3beb52c8a8671a65f1f32215b21fd4a74ff0a22b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_58a213e4f2515a39a3aa+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71375dc4398c3af0120a4de37c55f1ce262af7a2f95509cacd495d02a7389c5e +size 398499 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..243fe6804de438394f130092f65fd1bce1c5fce9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_7ccfc1e9-2653-434a-b853-0db82da7e687/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e435fe6c7692a46a317954e70059420e5f9b2e9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9adf8ac202de2700fb558b322d7d14ee584bac8e78958528c288aa5607e2a37 +size 1342464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9c680d14a242e85199116fa02551eb7461f6ddab --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6606a82d7268d6a927f2+77114d4a/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ca42851dafdbc1d97aab82a49c77c9da16357adc5d5a4c25104cbbcf9059a3 +size 1345445 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..30bf4eb8e0874b185e13c94004d8fc798a15fad2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11615c205c78a7a77a12a35f7e7295030cfd3d4416bcb04a2b3754a8dd1a0bd +size 2081052 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2a544192775e541ddcfe7413cf38731d0e60b24 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75c9a7a4c810a6349cce+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187afadd078b759d8a9d622822f0b389dcc89ed246eaf3ed8c8b8b7461a293a9 +size 1045504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3294677bec31eb347e80e2304482f7df3d25170f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ae531703-b646-4045-9538-20d3cc66a585/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..147869ef4c840c5698472bb75e9e65395c99fc5a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bff8f3b30de36289956dde05bbd405147b7bcd64f20487cd8a747e9bd8c6e57 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d2589e9b8b843e5960b1a153f688ee1e50429951 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7f69a178350d6371c2e1+854ba76a/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb2b0005eebd8ac8a5bede1e6141c0ff0e3993a54c455dc4b276a36c260a9a4 +size 280328 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..59897bd765036ef87720c3a1d1544b79efa0dc19 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1928e5c07070a76467a961aefb5ab2da3f28b01b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44486bf335774a55d3ac735cf214f8b8727280862f8f4afb69b47eb6aa3371f1 +size 1343681 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9dfcc07089c09f6c320ca46b2da92ce24c9f413a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8276ed7562ea388112863f2a152abfb220fab2ad3835fb71c280f1189e08893d +size 646144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b8b73344e8159c244d3e02b688f4603212e5535c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_838a440f0ba94b475b9a+d0d57c8a/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514ae49ce22a00aad23127ed8d1f2d2d543398d886758adbff2c092b2d987425 +size 655842 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6646097ff2bd65c0df62882cd1755500164760cd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17acb24a9fcbbe4aba036a9daf80302334a9a097f6949e59c37137c8985e0a2e +size 1357130 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8849bb5117a35a59f7e38b080b698337bcfb5d72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8ecf3321018998eea75a+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b8f53bbe5e8f801e0d178e735b629a035385ab5c25e006b1163a21aa168baa +size 605184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f296e4dea7d3faf83b7d0fc48ab8730b457cb34 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c04bb3d9543faeeda59c2fbdf50be9fac8a4744cb61efe2f4027814d261a353 +size 1129037 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19959108fa98e73f1ff3ec004097f95e0affbc4c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_920c248c8c17f5746461+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dfe3c519be9fb32b48b52eafad32bd30fd39f31a9e8a66e8ca8fc1e64f2f983 +size 431104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..90c01de768b0367611f9e5f8ef3e4aeca45cbb1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f40b0ee45aca1c7e21a0aa6a6a8e59e2571d9f35 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1702ec62daf7053b2dbd068c167f1422d1e605d7623be441ed8ed851d95cf57f +size 70278 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b96ed436580ebae617b461695d0acda5208f11f6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c0f3928be702cefb85f184aa726dbbf9efce9a249032939fe47bd952e4205 +size 431104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6292190ce253dc94723b50dc14ef7e9a6e7d7cc0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+0b01cb42/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8afec2169e22653ed984477ea526178db2309fe0697c0e7b371cad8e8ee2cd +size 443199 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..4d0177bae4087c9af0cb9aa0b256577127345e01 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fff44d7eda1714de5aa1698174259cd223360a13 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15529732da69bc4f0b0dafd6aec35139c40308cb34857d82f399c6e3cc8a5b3 +size 103199 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f13f1cb330bc0d23f34702e54fa7283222483df --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_aa028c0ea0061525edc0+be13b572/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392cd9748eb7c65da03b8f4f2b38ca84f71c637e51a06f0d7548b977f564cae7 +size 809984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a556a0c3f8e6705ba4eeae0523a3af763396f569 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..08334a05f8bd43bb172dd09e84e6b2318e1e3226 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e507196b5ab6487f7528313ae6b6d0ebb2e58214331bc634996f01749fdc080 +size 1371142 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0d346fb6d3fb5110bf5ddd7dbdfdf39aec757cb2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f52103be87a16d51075f+564b7b5b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8f72ffe3700724329cba725d22c2a33d3d0d6dde5391553c35c247625789bd +size 687104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..697bf29040f369ad05a7edc19f413577c9dd93cc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1d93c70855cb7094e613180d32b90192444e9a900b453ae28170f823782df1 +size 1227664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..700636e815afe4ee15fd39120d986b6e2cf868e4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc90e2ca15c6787a332988fad327e0fb5ab14857fcf5bd1e771ea61d89d5992a +size 461824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cde6e513a2c98cc10b5fedd548f5d1f298ba0c2b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48094e734ba250efe2a39756946731518d3027abd1057ce6e190af7decbfadbf +size 470151