diff --git a/.gitattributes b/.gitattributes index e4b1f4005773d0435776430179030f986fa62190..a0e3a64c1392de701e8e8b6b2219e4056f0041a9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3253,3 +3253,55 @@ neuronxcc-2.18.121.0+9e31e41a/MODULE_1bce819dd943e86ebacb+431f5505/model.neff fi neuronxcc-2.18.121.0+9e31e41a/MODULE_3596a0566dd0e4bfadf9+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.18.121.0+9e31e41a/MODULE_3596a0566dd0e4bfadf9+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.18.121.0+9e31e41a/MODULE_17de2205f692a10a3037+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd0c6ed2a9764168cb6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd0c6ed2a9764168cb6.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7e7c4874088f7105a5a569dfc49a6e4a1e316b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd0c6ed2a9764168cb6.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/58bb3ae501d4d87ab565.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/58bb3ae501d4d87ab565.json new file mode 100644 index 0000000000000000000000000000000000000000..2734eb040650ae63e63fda14310a2ba2315b0dce --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/58bb3ae501d4d87ab565.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f5771202abb1a7ae2611.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f5771202abb1a7ae2611.json new file mode 100644 index 0000000000000000000000000000000000000000..6b23e1e35fa68cc54a8f4801c2011b0a3967402a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f5771202abb1a7ae2611.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/3ed9abae6f7f67d07da0.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/3ed9abae6f7f67d07da0.json new file mode 100644 index 0000000000000000000000000000000000000000..f686524a2d6a538c4f8d010f30213bd3861447f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/3ed9abae6f7f67d07da0.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/42b299016a0208fd74b0.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/42b299016a0208fd74b0.json new file mode 100644 index 0000000000000000000000000000000000000000..ad9c014325f12bdfac6a43b874486321c9ee1c9d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/42b299016a0208fd74b0.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/c36bfaee43f6f840c055.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/c36bfaee43f6f840c055.json new file mode 100644 index 0000000000000000000000000000000000000000..f082ca228ebcd68110c3dd62b2d6c03556682de1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/c36bfaee43f6f840c055.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ee2a399906ca25519a40.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ee2a399906ca25519a40.json new file mode 100644 index 0000000000000000000000000000000000000000..cde00c7f90d0349725adc77211957349d85bfa79 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ee2a399906ca25519a40.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/350aa0c9a5de191ee7c7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/350aa0c9a5de191ee7c7.json new file mode 100644 index 0000000000000000000000000000000000000000..5cbd62fac646f71d19cadd778e6448a358cce49d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/350aa0c9a5de191ee7c7.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/85ab3636ce10c112729d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/85ab3636ce10c112729d.json new file mode 100644 index 0000000000000000000000000000000000000000..31e4625e2af61a82fa63c3d9079f7a0178659fdc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/85ab3636ce10c112729d.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/e6b337232e4796edc487.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/e6b337232e4796edc487.json new file mode 100644 index 0000000000000000000000000000000000000000..da83eb116375bf300e312fa4fbb5923e77a9beef --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/e6b337232e4796edc487.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/16874c09c9bed580256d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/16874c09c9bed580256d.json new file mode 100644 index 0000000000000000000000000000000000000000..bc5071c3ac3538b7f48eefd48e02e8796d89a7fa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/16874c09c9bed580256d.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/e38863225742ea4adc6d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/e38863225742ea4adc6d.json new file mode 100644 index 0000000000000000000000000000000000000000..725ecb433acad65d2b2b0314fa07c9d12863d2c6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/e38863225742ea4adc6d.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/fecca0469b6993e5d3c5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/fecca0469b6993e5d3c5.json new file mode 100644 index 0000000000000000000000000000000000000000..1bdd630cb1ea6a49fb173d05dbe028af004a5079 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/fecca0469b6993e5d3c5.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/34b1d02021287cfd46b2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/34b1d02021287cfd46b2.json new file mode 100644 index 0000000000000000000000000000000000000000..bc96f49a413219618e407ec2ac08fb468624ac79 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/34b1d02021287cfd46b2.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/7ee9b03bb7a8b01f359e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/7ee9b03bb7a8b01f359e.json new file mode 100644 index 0000000000000000000000000000000000000000..f5b291ae9bd22dd367f855cfda07fc4fbbc08cc9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/7ee9b03bb7a8b01f359e.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/e709ce08492ca8a65007.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/e709ce08492ca8a65007.json new file mode 100644 index 0000000000000000000000000000000000000000..4072010b23b7d9e60055ac37011ec8d246b8f18d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/e709ce08492ca8a65007.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4ef86bba6a55fe84f707cd4a7804eb1652e9888a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0586c9953cf96a4bf2dcd9f92fbeb91b5ee1d0c4a4fa4b6552ca650e05704245 +size 7099 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a978b7c3d2899a1190213986ede5056215566c22 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00333196986acf2eb79a+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..39751ecd4e4bd38a301daee7d458bec3012cb695 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adacc7e6f071cf30f81197d157059b22c0963bf27c33f23e9d9610917f32f632 +size 7106 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..839a53667f3f4f343ea297c1cbe012c4264c7452 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0661e1d8741e9ead0d5c+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b3fbe0ee7a74fdb89244c0ba48b1b0fd820d80bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec162b6f82a677bf5bad8a8984a403624ed7e7f8405ed1fd2b89ffb70ba3af5 +size 46541 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..052a5fe066dbc0db1c032f62caa0c132e6d71ad7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab9e2f02fa29415c2a584da7d3a0dd489fa46185414b6a6ceda1ff02a0b7e42 +size 154624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..20944d74b5cd685ec7b965885660f201eb827749 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12294311aab65c8b1e1b69aefdf4cc623e27bd53029218381ec6a4cf66f7584c +size 162285 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..feb38f9336dd3be69c45d0f5e6cacb6b71096878 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfb1031564220fbc15ea84f84f3e9f18fdf944af7f7d4ec99c1a7a5dc08489 +size 81006 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d82017293d30b3ab28643e0391cf340c913f781 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89157420d03816ac2f9222a89bce907177f1a5ca48f1137c20a533f8fdfb10a +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..66e566163971ea284cc8f027409911616b1e7982 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02bf62809adc55ce64637129b796e84a3032d0036f542bf94e0f21ec5512b0e +size 244319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f612a99c5a5aa7011c67077af4159246a5b62b96 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed7e1763d2fde0d0bfdf8790eecf071cb45a47c0486921354c5beb46403b026 +size 11227 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..011b0d7fda00e58143e76644f75ef7ee615644b2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7620edb3b2c01abe336f904cd409b85732204a2bfa44a10ccba787a4a0b474e7 +size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07ea5de8d4ec025011e713c79a814335038dc333 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3f3c80a1f760b0f801aa56dd94adcd18f8703faf4fbe1cf4629086b0f3ced2 +size 87785 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..742722e53f4fd051a3749048f32e1f71c0a5aafd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bcc37564d8330894550bcd1c560c819f27452fc9a2dccfc5012d25347f52f7d +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..152fbaecc7d8e0da810d3f760b0e09953f45fd52 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04b389baff4d431c49333fe1097c9bf015a8d8f0e9ab7b2dac8b6c470c50275 +size 79198 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..46bf13757be27f687533bbe52663054aa1449305 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c09b60673a5f6626e5e43ee91b19cdf385f2dc3113114ba3212f31b086f1fc7 +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..aa33d546563690ed627a67157abce1c79029e277 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6979a724da90b05c60b1f6049e232b513dd03d8d1a531e9c0704643c8ca737ab +size 223770 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..419f81026927d042f12dc056c93cc96be9fba1eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5674bcd68065f056894bdc9171746e715d51cccf06f6813893350bdf1f4f5db3 +size 7099 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..05d17b15f3dde62ddbeb6352d704c4fa97d8d9f4 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_266c5c386fd48dd67015+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..74f4328c100742b54a0da08568c623fdb58bf8d8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291711a5dd69387c68f5d90c715f4279533a4c872e22ca0cf368e4e452231d49 +size 81323 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a0f98f0612db6bf34905e5bd353636651630cea8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21068e94ed15da4166d08503c40bbfa65fb0b048bb3ee873fde12b233f4ae681 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..39fe4ac1b075f968dea881ec4d317033acea029f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2218fdfbf71bf29afa0ed05cbd601bd253eb224b8b68b012acc92cd349a9cfdb +size 42250 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02623580a92f2986a82d058e771cb3d0a5146d8d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01ec781276063d1550d6620dffe2d414e727480955959ac21331622c17ccf53 +size 164864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c405456691ce1123c5dd9b686c63ac696cd4efbc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1d4d6bdee08e32a94cb51eb0d019d7f562f7dc69546a7cba310828a021e389 +size 172461 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c95e41c90feb941c61b17a5e9214db5132e2bc9a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93332a3023b3043f3c7d3f986b201671027d584c5c981dbd3a81f9b1c75b8c47 +size 7106 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1454977989c2fa71c93b44467908fbb896400e21 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_32b878f811b89114a743+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..357a09cc68ca787b762d016946e1b412289ddb34 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:785ffb37d0114be58bbff93596d1905917de36982edf639ad3f6b98cd5e43415 +size 82573 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36c8e8e483c611c945a8f4104c982de48893bf5a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2aafae91ecad0ad9b888bcb669b64acb2929ce33065e9452bddf2d2d8ee4a14 +size 359424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c93e099f8fb82b7e7846132de8c40ee41cd860f6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa45fabb946616a68886fe9aeb9bb07e2c6e43248917cebd742fcc759bc6ca82 +size 7099 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..168e202205361dde465a8b78023a14daa268811b Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_39930d0c2120289c9c3e+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7787c6da17981973a7adde99e32aa60596dd893e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4785fcc6b48ba2c44706eb573703abfc4a7eafb3b15d451691d7a85fc85fdf +size 7092 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ffad384663ce2da0b032949452d86872fafabd3 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_40b0c067485074fdf48a+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..619394612635f8be14b36ee82d229dc5bff4337f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbb8991ce4c08190c00f2f5df8080b34521b6b52fc50a720b11ab9f0d4965e4 +size 69044 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..692bc10dec58b206ccaa112ae651dcbb650b3cd6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b6371144cf9c14b0241c348d02996e5997df9694353a59d7657009aa4a0626 +size 1168384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2cb9abfb8e1ee1c3a50d91035b5819017ac4f072 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cce13c2c8d6e621050c4f6d2a392fb09e41dd7942671696a8fe6a6738dfe181 +size 10362 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95dc9d108b7a7133bb92bd1cdbbd17fb92ff31db Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c948ba1d275cea9b124+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2125dd117f8003a3eb43d47d4ebaf3b89e2a2059 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8dc89c6b2f228783a4b7a847c419d01d23623bb1c36c7346580d9b8f877269f +size 456914 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fd3f30a45f7c18cf8857511196d8e984c32d92c4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6440704e13e17efd6a9115b3ee8306b3906f06fc8106d08f762dedfc26408e +size 31376384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0411052cdfa95b344818a8811a935c496ce9e694 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e922a688c4f6359c90d7f7c743bf6bd312ba9ad617231becf02cec490eb8326d +size 51946 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd52c653d8f5762e01ade9ed2f069497869146b8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39311f8dea15b407c7820c48887e5274479782b917cc9281bbd0f759f206ba83 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d6a9f07914f6426dd66dd4a1b5c8d4772d0bb0d4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd188af2314d5a7eff13ea15abc949171cd513fc3e3d699549f36bf39dd1fa32 +size 195507 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7941df158a29ae97ca371c2f91f26160a2a6a7f4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce77cf1b3c5ea413fb633521a095badfa4843b1c0154ea2a41c0f3a335caa205 +size 81006 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0fe22e64e38d1b291a4c4719e59494e661f54328 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc6d1bcffc98a707063849584bd777524ab16d99a9b36f43a0c97a8b85341cb +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..91def0f685533e6ba7591af6311eedf398f790eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dee33a8f0104c3005b67b596d1d3ef787e7cd23850f1ffac3bde7c6b31e3c85 +size 244319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6018009af437dee58def76e11803ee14b43862eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8595de39b60526028c04b4c6d00125d4521ba64bce786cf02368b76977ffdad0 +size 50076 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a989d8e45cdf8d4232e0c3fd3460bf86d69be29a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ff807748f08a32821febafa966cdfe8ba5def5c664d1c6d02826fd2ee94358 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f6f45609d733e98fa628e247cbf4f4c64040b3ec --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3911b9e19624e2d62f7d180667f966981db8d75fba37e5f97e83efb5bfe119da +size 87785 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..55268d0057a121cd68028b5e0ae16ca474cd9c64 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a1d3978e4569f994a020271ae0831bc5643a917923e45b4c3348aed583775f4 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1627088221d0fa812c4a968a72bc1273c515f7fb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b142d87d876dee57b5b66d54c21bf5a9bc73b3d63a32d4ffd01c46fe2c82e2 +size 80469 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..721c228ab64a1572b175cb9581b751dad03b08f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5cb8e6b7db67bf40c1e0170697533b941b247d1324c5fc287abf11640081c4 +size 205824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d74728c270ef30119d766669f27d5568f7d37d40 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4e4745bdc2b2f666f9a371aa650307ad85e38548b8b082c3ee7cf04e28c573 +size 83489 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..72ee4ab9b23d858eacb53ec76014f8351f2bb05b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0224d883f8020fbb8be6d472705c13d81bf4f3f1a27ad3bda24aa3146372572 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ab3fffdfed99a4ac8d5d16069e703fa9e8f45088 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf33660ad2b95a5db5021c668111bd025d45020cf0a23ccf8fc049154b661ea +size 193114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2149c8b65934ee41538f0ff8bb1de51761e3f668 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9dcc2d291a29395a7d3b0ecd3c329ac16dec5a55d463e1547fdacbbde664e4 +size 10010 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1610224b254f298cf5f0e598b58480b909b2701e Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d16df4d61f7ec5889998b2629acc4d936367f1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61eab86b4c8d3ff91c00c1739d222c4ed2b688e4625c4bd8cda0e80dd81d38b +size 83368 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f84dbe31ca5f0e7ee9964afc1d5c138ed65435d0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a833e92f3439f4b0359cb62519c02c51dda934079f1cf31e8ec2b8fb2f091c51 +size 420864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b3e09794147aa46cb3586f8df0df755a69e7d0b8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c3291c3028b7cc44e790e359a20c4a356dc8317135a8920091635dc195987e +size 53803 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c9f55811dc28e3c37363b9de0fa5c277e5e69327 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04718f2195a2c1b43c763d1c97db3bdca3078632c6d0f8a05ee89c442b6f708 +size 164864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7a14fa9b04c0671ef37eafe614ec6a7c25b5c5c2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ee9aea0fe9a0404969d1d2793d43b4fdaf9f7455eae8e8d26f597ca0587e04 +size 83489 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc0ab807fa4ed801936b627edbda12c79d239e49 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4041dffc23fbfabef08cc959c1cd21aed5b8ad5b9ac0dad1a8a4cb6faba17fe9 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8a814d634dfa89551774d83bcfc3c116ebeb0a85 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7962dec34318c62eafebbb03c003a1515a174f5348826c9b222ff2d8e3947dc +size 193114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47c37ed63a214364892fe16dbb18a26acb5c9542 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bd2354b87e72d380a363cd1cfc99ba880da6cb454791b59fd1278345941946 +size 7011 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bf4c14732768a50c46be5b0cf9124efe046b04e1 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac1c1cbdeb45a22e8c938abfda2a718eb2f8ca82 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba6755f76044cc182adbd3f58b2f88e3647ebe64d521e294bee38f33cd7d099 +size 82571 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a7036c035482c95fe179591398f40ab70c3844a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ed7c62c7882a05f20a82c1070ccebd3b96ebccab601e9985d0d9d470ae03a2 +size 359424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1654cfc00a632ac0cb2fcf75f4d89dd2d5d75c8a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a1f8e05a86a5c8f515e1c034a06442d19fdc3f1674c0fbf0e3499011b528c3 +size 83920 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..21da0c1439b8cfc9b6a7039588fbd4d8c7a400b9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d25a3804d66b7ff656ecf5199d0baded3622f17e660ef4f4736e8734c1b0938 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eeda5a2da7396c5e19acc89c719ce651f7aa08e9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078b037ab70c04069fa78a3ad21ecb6ea967038e30333d61e6905f52ed3b8ac2 +size 80327 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa00256331c0cabf14d56a463c8d19d3b5f962d5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf462cdeb496241af30d672b2e4f91b7541fd490d67691bee195c614e1fcf79 +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f767a06770c98dc1cce417af3b62edc988cb0a19 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4b338c58fc56e5397fbe8016b2b3780f5a925cafeeb733a1e0554aae0c5e48 +size 319522 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bae7532e62f87dd96aeaa2c2011979cf73338534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b160039d090f8d029b08fb98b9ef3a9477eb4389b4b08b4410dd67a09e9af6 +size 80469 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a1a861c4ace0aafa314e00c9e59e651a373e19b5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4972af6f4c52b9f9229ad3866e5d54a272391a1839aafc8054159655bcad7619 +size 205824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20009efb2e8508fbeb58fa71bb0e83bcc2161b97 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e251415128048f19616fce8b11a16250dd5b8bbcef958304e73dd3e06731decd +size 88894 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c15965095c8cba5111015087b355d4641c3a756c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1f5e0c4b538ef4d33b7c9c03dd4d8c636770d739a6395af324e36dc2a9fedb +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..99896f8625b657e4bed8991e55de90449186f481 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e47c8d6011c2921e8cb2c32b61ce263b5c06e42a22d81ade3f0e63d0c49776 +size 247153 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..622ad1b68e49ff6ea936e0013ea8cb4aac059ced --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a72f28e668e46cd358e14bd7129f7fbeb247f20244f75febfce5cd0658932f0 +size 11227 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..089d2fe0f250f49558924ccb8fdf25d122a634bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b6e95760b38f249119c0916517c0021346d8c7908b5566fffa0ccb5067cd11 +size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4e6c16f42b5841a3b416a34149ff1004ecb23d9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78e4287bfc92bf850eea4d10461af2352f2edc23a29ebe6782591e1b850f075f +size 83920 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5c576b737d9ff722a68ce0d0067e30e98e735637 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284a5071c265c48612d234ce96a5c2855f346de8b297018766db228865277ee9 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a05db28aa1daa64e1e6adf1788a0b8c7cdbbe8bf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700fdb2999174103945b216119818f393e43318102778ceea42408cea535ef67 +size 81323 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5bd62aa52442ddca10951bfc79c87e8e11226db7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57afc6cfa83683d461e4e7a29bf91aa41b051b845bab370323414a6a7ec44a6d +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..92aab4de6e655e40a4edf39f10cf845d8a241029 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3af38dc3759a0077763bc94bddf02381e1b81d210fb62517f722b06bfc5683 +size 44058 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f0ad67ca06668adfbd0292f90c3ee8f2b47f9816 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42979cd0ec9a5f3294698a306d1083ffe6c98f8a98fe89bc2760dfab9097396c +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b248385fe494ffc7cad59a16508e903029cff1d6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ee20aa4f1f1686fd0bf72fcbb970e56e5afd7f78ff60bcfed24affa4a8fb80 +size 193010 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e86b6b5bd3a002008976aaee568fc57d8363e111 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cc9f83f80d94830a254502de2641b637f314d2443dc11e2df3d9febb57c81f +size 88894 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..804e207e2418e3f4545d81d6c5531911c6bcbabe --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b633e19f1762738f127621ff50d126405e0aea355b8abadbba521b3a4adcf4 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..be98b35c896f791f6941cda19904560394773a3d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e64ac581f0a7cf925adea4b412b0e40d764dcb011500119d451cd986f5d4eb +size 247153 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..88035493eb87f34b119a4665d937e6a27e17a714 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e74f6e05b9bd84d59addc9f34ab7eef94e63734f2f1114b01407379ab01ac4 +size 7011 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f6c9c6a98043d9e583d7703d88c6b5e9129fc399 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22dcf0f8c79ea418cd5ca2568a4ae6f6315ccc56 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8919938a040060bd5077eef2dfa9d7707e8483b155a3cc2449f75b6e5ccf99f7 +size 10362 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96d2fd66b7b404052a3f52202f94fe5485d46ebc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c74f483559fdc81893ce15a2c0a9fb80881201a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f54913e8a38be19784fc08f94a57846677f6300bbc16748133894322a64cfc9 +size 79198 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d43f71f7fbeffaa6ff9deaf124ee62df29ba049 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1ebf710d7cc5b0de84ef8e7920dcd8eb62252bec30e4f2e0da985dd31de9c7 +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7c4d2866082836642c5ba9e5cfcfe24ebb8cf7d6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42680fa0b68c035ecba291608b9ee78d9dce3a1d8932fe01a4eae9bf2d77e4f +size 223770 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ffee70cbee83d12ef55567d18ac2f1cf7fec84fa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c0186233e1e8fd262423a787edf1f033c3e1920648cac218a2b922f9731f08 +size 11183 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d67d9acd6b2a6fbe231449b40669f6c331329d37 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12baa0e6a5c8ae2d90ebf56eaa45a53f8085068dc450b5613927eafcad8e1350 +size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f147dd1be539b023212ad611bc29974c4c4e0fe --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa1f1c8aa58964fa1c21d32c9f6b27235496ca5a3c5b0971eef05f395e17244 +size 374859 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..039a66e561ea3ef85fc422e11159c84552cbd1ef --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90011a7b74cd8cedd21beab5f0105b67a975dcd1c99a509a7b76832aa6c4c8ae +size 2151424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..110a46367a74635e4f2ec80e2b4eb234e05c4d8a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930529517954a020d452a7d3cb9ec3fc249379cf44b05340e8a7e2993dad6a19 +size 2221053 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5dfc031cbfc50005e9ab3569c0954fbad0e1f3de --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b4d819deb1de11993c4781088e8cd90b8ec691f7a4bcb00c0c1c257947c246 +size 69179 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..63d3a49485f4766ae4f97ac71cffee3dc827081c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7614258fc592bd21a671fc29fd833fdf6321eab9923b28bdaa281a03ea028aa0 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b72fd7079202294281d082b853096297c8c1f04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0652c2837255ddb07aee5fdffebf5b8ae68da37845f1e83aabe3f152b2b40db6 +size 258114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b45c639e4a7e818e4d96f69f71149d19b10389fb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb25c443d6cf2fe2e2ab55b05029569a1aa3e75608c0634a8650533b06c46b4 +size 46622 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e203f494f922b8f4690c863f764105ccdf6534f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23194ec69f19c8f3023e75314b47e31e462b52c4e3ad5482c7c5871a4fce399b +size 144384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14baac76b252c4f5619625e0bb51aa8bcef52a52 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4135cdf3a474d12793ab55474180091c957ef032abef7de650b0a277cb2048f +size 47478 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83b0124ec96368bc29408c631698f5485dd1a727 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6c609b2232870f2babcf7c062f4312c306563a2d58f3f7cc7589af54486206 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7ac17f6035702ed025e259a4ab0b6a66e7b9dac1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6adc0afbde64c61bc0c59d708742998d0ed8cacedd763d5b897f26939ef4e46 +size 7004 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3811d7917b56c964461723a18a924339eb13ffa9 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ed6180267143dfea9183+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..91cdde0a25ba1df9f098c32be00988e47cd7345d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:339c23c9dc9b06cc18d3ddc6610dd04cf812c9cd2d347195b70f06454ce70ff7 +size 69177 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4237d3675e8adc880406ba92a59b454ebffd67c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0257059d9e332b0a165601211a0f5a11c8471c1f55ea42fdf23e2fae04d278d +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c74a39bc36ecc53c7de6692932a6e7762b124356 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d618b554d92f95543e636879190f32bf698d8094c33e0a648cdff00e7a4e58b +size 258114