diff --git a/.gitattributes b/.gitattributes index 9ecff19ebb08c09691a078e5449cf5c0acef3d90..09cd892726987dd42be3a1db3c7f7aa7993b8bc7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12804,3 +12804,43 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_1280d8f2f3240abd5752+a02c3a36/wrapped_nef neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca0b7d2c1246aa065a0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9eacd4b781267f13a69+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9eacd4b781267f13a69+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d337db1f2945982d0bc9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d337db1f2945982d0bc9.json new file mode 100644 index 0000000000000000000000000000000000000000..4019d208b4f00e2ebcb5b95a7d6d5331dd11969a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d337db1f2945982d0bc9.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/llamafactory/tiny-random-Llama-3/ada00e76710d2d884cc8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/llamafactory/tiny-random-Llama-3/ada00e76710d2d884cc8.json new file mode 100644 index 0000000000000000000000000000000000000000..5233cfa29d6bbe17f99656f0fb025b7bc8c4da99 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama/llamafactory/tiny-random-Llama-3/ada00e76710d2d884cc8.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama4_text/tiny-random/llama-4/caf8458124c0c1b8b608.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama4_text/tiny-random/llama-4/caf8458124c0c1b8b608.json new file mode 100644 index 0000000000000000000000000000000000000000..46fbcfa4774fbb366e85a67e9f8a9e47f7582fae --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/llama4_text/tiny-random/llama-4/caf8458124c0c1b8b608.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/mixtral/dacorvo/Mixtral-tiny/9c1a264c4d265060e661.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/mixtral/dacorvo/Mixtral-tiny/9c1a264c4d265060e661.json new file mode 100644 index 0000000000000000000000000000000000000000..00a1621befda3c7d10812b01a996ff15cf0ebdc7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/mixtral/dacorvo/Mixtral-tiny/9c1a264c4d265060e661.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/phi3/yujiepan/phi-4-tiny-random/af5f9033652bae57657c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/phi3/yujiepan/phi-4-tiny-random/af5f9033652bae57657c.json new file mode 100644 index 0000000000000000000000000000000000000000..6f01d61b989365c9ce5ead26fc6700bda74a87d5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/phi3/yujiepan/phi-4-tiny-random/af5f9033652bae57657c.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/50dd484535c9c8562031.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/50dd484535c9c8562031.json new file mode 100644 index 0000000000000000000000000000000000000000..a572f5ac3c4603c9a5f0da4a35eb1a16d363adb2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/50dd484535c9c8562031.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/040b1e23663eba2981b3.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/040b1e23663eba2981b3.json new file mode 100644 index 0000000000000000000000000000000000000000..c6b9445e7e04fce43b67255375101dda336bf9d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/040b1e23663eba2981b3.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 24, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 24 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/6f5f2cf26c21b525ca82.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/6f5f2cf26c21b525ca82.json new file mode 100644 index 0000000000000000000000000000000000000000..75b8ef70669d0bf8af668dd9dbc67da47647e4dc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/6f5f2cf26c21b525ca82.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/8f2f6fc022fb92a08835.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/8f2f6fc022fb92a08835.json new file mode 100644 index 0000000000000000000000000000000000000000..e20956915930705e090d84a283ad74cf2951b4c3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/8f2f6fc022fb92a08835.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 24, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 24 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/922b8f110b9e3fdaa766.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/922b8f110b9e3fdaa766.json new file mode 100644 index 0000000000000000000000000000000000000000..a52aa077e4128f03e4957bec8ed436194e993498 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/922b8f110b9e3fdaa766.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 24, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 24 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/3c013915c647bb8e6712.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/3c013915c647bb8e6712.json new file mode 100644 index 0000000000000000000000000000000000000000..fc9257bf47a2673fcfa1b8741d8a42eb0bb31ad0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/3c013915c647bb8e6712.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/b60a4e37d9a2dbabe961.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/b60a4e37d9a2dbabe961.json new file mode 100644 index 0000000000000000000000000000000000000000..7b717ebe5f9bf604f9b98b02a7f8928d1c5e02d9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/b60a4e37d9a2dbabe961.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/39b749f56f6251d4327e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/39b749f56f6251d4327e.json new file mode 100644 index 0000000000000000000000000000000000000000..89a055b913505a6a896de6ca82f4a29515f70028 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/39b749f56f6251d4327e.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.2.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4384cef07218b1c31307e7b1f0d88e15f052dce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7607df5f2c7c5132d87730619aa3366ddb2035e223a239aa5a14a1c9d7469d57 +size 848773 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..53ebea2910a249a3a5cc978a22d9b51771f9d7e8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d799faaff5e25a7531d12f187cea88faf79b29e045c2b88d2c3ea0b8bd94cab +size 5786624 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2ad69cd01383a682715fed6c7faaf92a72533ca6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b6d58bfddea9fff9d33e332a71aa7e3df064e510 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb0a23b716925f843a16e15f9f0385f067950c8f9f55cd7b3db583ba03b1a7e +size 84807 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..65c3e0dc513174a5405638385f4e4fd1b6fb4ca4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f26bc2e40e6e21bffa40348dd28785a0c5ba6ae26126553c85435559849af5d +size 646144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b28dfdd4b213537b2dea5c6bcc967e9c9762156 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d61ac2165fde7772e8d3035b0f89b50beec5e0d305548cbacc624eefa9452d3 +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ff7e2c0315aeb33b1e8ee2fe5b817ff3f76d8a4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690971e38e78e6e8e2e0b2249904244dbe845f18a1d36cb2b763deac5fe550f3 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0605fedfaecfe63a3d5054643fbef3013bda8c52 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937aba417c7465954a0bf71e8d536e6ce2db41d44b477deb724f8f6154f76e19 +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d2c16ff4a7ab3bdb7b6f0d5f1c453b7e5df4a200 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949343b64aaeb75ac893058b4181a58759ab30c14ab879041244bb9af4356695 +size 850786 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.log b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.log new file mode 100644 index 0000000000000000000000000000000000000000..fc2f64a853b59fb1145e8759972e18fe8b3014d3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_315d9491c827c077575a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_315d9491c827c077575a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-11-07T14:10:10Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fed7f25cf4b93c6a441366dc54cf68d43881264e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e6c39ae5aaa2365e0018b111269da40c42b131faed18711a8810bedd68a436 +size 429397 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6eab1f1de4334b96862689fb30572e9dee901dd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0135e8fa145738be49848914c2dbb2c52c3854a2403644f1842244e432ed0fe +size 1352704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be8215e4db02a98e7c7d9a9b026367f5b42a91ea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ed0858162f8fc2c58bc4c23d0925dbeda5897dae0889f87758206676394f4e +size 81843 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..394c147992a80dc61c65cfb271c654d1852c968c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc574992e72adfe26c916de4e344ec745c5f0367620c1183437c3127f9fef15 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c81f0318b71b9bbc335a042b83a9e87ff72482ad --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e3995a13290761f8f6f89854871f6a5da48002e637efad7b67cec72e826cc1e +size 255104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3279f1a5cdfeb71eca5b03cfcca5ced0696022eb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a36ee034700342382b6da2932f74a5d63b34f8b66d47da8dcd5b0cf42842dd7 +size 428915 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af5eda3c5dc6d1cd509028962965b8a8a8c6e383 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2978f644a5df5278f76096838837e039d1d5a3dc2cf4e9c2a6cae1784f9bec0 +size 4199424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..64028cc78541c1833466a343e251d2e19d025203 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70386c0d4c7acaf312dfe4908f119fe46d817ab3d82394d549c5ed84707d551 +size 80382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..71a3bdfe44c2c40ddc76a23f4e943654c9c4ca28 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40684a5208697e733fd92b531e426c9a49e2e2f3dce8fdc81afe434818f265f2 +size 216064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e878da9fb5ba13604b80452f8837abbf73e8bde8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e490cb2df34ae3582e58811ed1a9ea678ca780e9525b51113490839860539904 +size 224412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..04c1b8097ceff1583e5730189770e7b7e79e8aab --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f01851ab7ef0b0809a16c7e907029400056497364b603f59ab2c9f38e6cfc9d +size 427538 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0a28a6108e524e6aa8bcb7501179110d0b35f10e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d530cd99cdeac86375ed3d4ee421b5db73c2cc8ec537732e3f811d227f4c1e +size 4158464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb index 46b8b3b8a96ccb8b3aa37f349144eb76b364d68c..062611a49b7db4bec1f9381c667ddac63fccac4c 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d216c335ce9db74f5c3c60a0c399eb21a93c67f5d8b0652b3309d6b96a84777 +oid sha256:87c27fc5e9a14dd0d728587d4b5e633e65e6902974129e23c485ea65109a7a63 size 1061093 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff index a5f233214281336997f9b10a10f998915d720aa6..d7aa08d1d2553d9e1872ec1ccfa5f68b218d7d00 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7d4d629dc75429b7c3b6f124b9f006925cc3ea34ea195858dade488d3dfb65a +oid sha256:13ca12a9af7e210d81d5d48764b74bea3ad2c8dee1d5f6ec04b511070e5aa4ca size 9473024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ebb9e007a6c5445dfbde4f94a2ec1f094ad203e3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6b6bf0feffa5e590b798a1c60710db88bec1cb49364a0829ce08a05370a91d +size 435194 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6718c327efefad931a41f4e9c641a143705f64f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f32b334d4b29c0442354fbb4970d0103862810dbab9dd6e1d90f2943d2fad5 +size 4731904 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a40c022faf4beb0fb20246d9c1f9c8f32c6900 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..727fe2caac58854e2f659732c11b6b5473ac4505 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6596d3769509c9146ae4a3975d1c70c2eb236f1527bf8cab5cf9306c4300289f +size 89555 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2916b3c791c6e86e6519d9605dcbc80ec5f2f009 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d1a66ec4620a7bbd95ddcd3f0b8563e7b9fd48c9167bbb83e09de5ea8f2045 +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..00f1bf5626daa38aadee5cdaaeaf15232e382a9c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d780e7908fb2043598326a014149c1adc7cf79ede14eaac7e57a0efd5aaaf1f +size 379362 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82b6d63ef07e8b99f982e5b2a216f4fb928ab10b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b57a81178b87e2fa8d72c53f158790060e6f323a7ea625353c0c2ecec75b33d +size 694128 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..076477cfd754342b7fb936e67752c55b6434393d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94b84c6a5ac7142334a119a16754827bff07c49ce7dd2d1b9f71d6c2d29a755 +size 625664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1105808fa0097ad3d66f3f2bccc9b12bc39f10e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec5e1a252b77227b0d8bf55d0ad8984aaaf05d34c12fbff567fc48bf7f1f98e +size 431779 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..54d7db4509903e9a8b66923e70feb5174a92a3e1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15884b84559dd7029720a5a83714e250e569eee383b805015a1b0a990e58ebc +size 4158464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfff463578c0ede0890c1f13293129fdc22290ed --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af2b03849ed64587b463db5b9e3970e996aacffcf09d636fc06eda4d05a031f +size 428976 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1526b27eea5d148e8095b871f1c65d09d5f4e604 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820f5db477d8e1b241e0e47bc41dd45acdd9b86fe4fe58e6391e6f3879f8370e +size 4199424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0bc18f8dc18a593e9507320160b098df0f2d7d3c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abda4bd7989f2749cf9583868302f42ca315a2e3de1183197eb1aa8d33ac28b +size 428915 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9cabce9bd8aa6b1f4485daa16c8cfdd49b05b198 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ccdd6902a5acc60bbf9dea8744c143fbcef71b442b84b18a2ce2d7acd752b5 +size 6237184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..acf3f511b6ffaff7078cc0c892c0083f21e7e91b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a22b54d714b2e6fd73a96a878615820fd25ebb97a5970c04db7084ddf50b4b0 +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b1e1ed26daed4964edcf7abbaceb23ed14c61d94 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60ade2cbe5d1ecb7cf752a53cdb2fb8394d10759cceaba09809d3851485a793 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2bdf94343deafea1a7dffee349a18f1ea4860829 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11caf63cab9ec2a78c7116e6e762873e1e1f1b3a6a165b692abb1923676d1796 +size 587842 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ece78e2ff471d162cab2be1e33eee356b6e7c336 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9400060f63d2a3dd20719706b468bb1b0dfc0f605a96b1dcaa82143d7470b1ff +size 809984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4c2121746f92b747a527e0710c50fef3fcba5e8b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b23f902646a683f981d4529dc0f2c868f485a7730be1c7b355a33c07c6579df +size 944468 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/model.neff index 143f9f37814baf053587daa6979da086f6f2610a..9eaf5a55d4cdad22266bbe4e70ab16ea0e142332 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68394af2fd8cb94a09b691f624f0ae14e54c0e0f34284e10a806089f0a6562c2 +oid sha256:5063b2ccd377fc1a01a8d3214378204d42f443cf95ded9b30e6dcf73c5ab1de4 size 6769664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/wrapped_neff.hlo index 950abcf7fe26bebd0c8d2ae35f1a8d52adae0603..ebf3458a52a22aee4d1881f990d2f936fcddf47b 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a14ca808b2720af118f3065ed7f2984416991d63fb6fe2d95655f19844b70bbf +oid sha256:3d7c13e30d0f05134864003d0128a911eb9ff555ba8e13054cd0fe96717f77b2 size 6936331 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a702af93114f0c784c5a74feba6807609697b4e8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79593901669a6a26dd4d0391b98b300b7064110aa994f0e322515227f8c944b +size 433643 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f09f342baa1fce2c605e4d3534661baba1de0748 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21a6684c188efe851de16007f05def2751732fd9943f28c1aa2075b28e9cfb8 +size 4015104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7d85cd3d975162683053faa369dcc6259604af --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2117dab3e3ed8caad749b8256ab430c591d3b72a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec57d9d2f9d9be197ad88a58ece6b14b0ccab97c121b8d20cc9512558ddf6562 +size 70276 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b0102cdf1927883fe3234477be913799074c3dbb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76eeed1f996721edd4d90e040c8ba23e123b944dd2e18633add5e9705083e6d5 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4b52f9010eea2d4081ad81c5c71223fb8c786cb2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b92ec0c7f07df4aa9454f74cd4630602acfef75330bdfb6ed20fc99fcc27b9 +size 289571 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8938038613a8390b96437286c40a69070b6bee4e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8400d9b46cf0f503ba7e4251ae60b1ff81a266ae21d0e312c5cfde66828bfcb9 +size 617833 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8dbbb1ec68cf06f7a2cee8a4d1e509826551af5e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6673718aa1e2786568db5d56ee245a5bcc6892a1c108054aa1f69c5c75c9005 +size 69643264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0bc18f8dc18a593e9507320160b098df0f2d7d3c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abda4bd7989f2749cf9583868302f42ca315a2e3de1183197eb1aa8d33ac28b +size 428915 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d43d65df74c2c0ca3a1bd54df6a1c73805903960 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e60ed421b4699c3e623f16a134714d00334237d986474b9171632864df0258 +size 6237184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbf6b9366d9e28d7f19321e85acd4cd96c0614 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..736f69c82c1268401ffca3bfbad76251974b5f45 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f167244b4e5955198c7c0ca61b3c87ab0806098bb2885eefc0395531c74cbda7 +size 97794 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6a59d8f8fb67662b40b9c852e67e53a30c1c67c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902855d7c01e956cebd4ee97d606374819e3d1772628fc92189bd7a1e9c61f78 +size 410624 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc2cc54570148e0c14230ecbd8c75282712e85a5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:493e092fd95afacc57041181604c2e81b2ad3acb7e044553bb6499c1c7074a46 +size 429684 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d5ef03cc7302d0c426b071ddea750452fd8b8438 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99d916b563747619fe45f56dfcdb74d5c21b1820ca5e4cc419ac08abb0c7838 +size 4158464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7469a9bb5988ead48edbd119f934486cd88d4f06 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a46b94530dd2e9f257ee044a402b122ad73ae7c96bcd4ef5ecdf7e100e2b55 +size 850786 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e06de99205f7d2375c50fa2896910e87418f9fe6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3389b5091ce04a415c4b6677ba5a31c43d8b7aa1e44672df129e2ae134af123c +size 82637824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9aac85f572373187ed5932ede47f59a7a91323f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0902b9467e36affd529236be99c466bb326eb59e56e6b6414b2bde17d7913ac8 +size 676474 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be005290c62f5dd8c3ac7cd53a782b00f0987b59 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec5dfc6b85bd28401f2ae9ab0c8ec90e2a88bd02dd3c576d6deba9f2537c5f5 +size 543744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..85d9b04936b7744ae5cc0ccb97e8b0596cd86f91 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61a3b4c4dba6212222c9cad3593be877e5466393fd035ad5f8cf131926c2c57 +size 563380 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a87347e585e555545a4c675cb4ee611f7a685b0b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d09f90f30891e710663ebed6df45c1a6f95b261a9e7bc53dd0bf8fb37cab9d +size 82772 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d30f59e0104a26d36d46bb78be39af3726702a7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f04de137a9197b2e345c6e043d111f01bf2b2d5a54fe7ecdcce559bed09d1a +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3279f1a5cdfeb71eca5b03cfcca5ced0696022eb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a36ee034700342382b6da2932f74a5d63b34f8b66d47da8dcd5b0cf42842dd7 +size 428915 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1891ba4405f29ba86f01091c8bd6fc9e2af7b929 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc597f6cd2a406443f6c3521267e9b1a415ffc3ef09be331d9970f369bf82986 +size 4199424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6c36bcec75732900885291c9c29ad9273d911c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd02cc415a9ec23bae8b8c33f608759c1acdb1c1b1d00c2395f02013619bdb55 +size 558318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dde1c5632fa6ca476710981cc6d7f967e247f748 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb726625781c2508bd21976895f5dbf613a064b512daaf299d84ac18963e1c1 +size 4588544 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6aea211899f1e1bf517714fe32ae73054c23fc0a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a9d81b45439b73d61796a2819b130dfe5588da28743c6dab28e51b8d6b7b92 +size 445260 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..048cd243ce0a708aa8d6dea85e098883f86b94b5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02ae0cc9a2c14db262999a7fb54bd7a3345350b4866654843e6a5e17b2c5197 +size 4127744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf00ca0b127128e16e4289e4fb5c4a4b3d0feca3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9cb3b1a05b77d5bb908b7d9a5344001bec8bc04360d230f2d8ba908ed19896a +size 81516 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36e2fd2e7228867478301e210ccc4b5d287fbfe7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ca4a3cc0e9dc7be863a8fd62530378476d4a3010fd01500b88bcf19c02ed78 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fde266f24753492d595dbb2d9fbc88c2a3fa7f78 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c908a5ddbf03970315b73168cfd6d3b14f39328f2b700c5d1e9c4144323034 +size 91147 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e85f5bf4d6c2fab180a804e703433d283dbe9551 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9a7d9b1ec1b1e241247ec4c7d5a966b709fd0cf6a419d7ac3c6501561efee4 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..aed0aebfd9079d2a7921fbb1ee00ac31cd9f7d1f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eaa781288c2e1d7ed527+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1b44dc49d450390d788fee69a2e3b4b61b3e79956d4efe89ed6fef696940bb +size 289031 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83719a3bcb5b94c8f07cc9b7416b52508aab2a87 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b537aa9235ac17ac4afc50f42657b98f27013072bb50aab578243173ac38203 +size 482166 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ccb4390e799039b0be36424d668044de8c5878fa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b9ce48f4cb562ec2c3dc68c3a904aad2dbb1c0703ebbadd6eeb6d0f5c771fc +size 4199424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a534b6f9b9b13756a0c296de4f4491ce2cb4115 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cc69f930e3e2d6aac7b2581033933d9d5372ffc238b94f014e0901070af434 +size 90382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..34ac8d11ea71b6492fb8ddca287dd614b02b1f88 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fd32caf35dc9cdf42fac+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e96e22e545fc320f415061f613201ac019eb714022311d84a90adab4f261c1e +size 359424