diff --git a/.gitattributes b/.gitattributes index fb3691cb2e2844dcc477e138fed566b94d933ff2..b3b76a9138cb20b82fc243c1e642ff9d44536f79 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5366,3 +5366,24 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0808787f3555a6627cc+24129607/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_a38bf2241a80d200aa33+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/aad2db4f9bddbc472828.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/aad2db4f9bddbc472828.json new file mode 100644 index 0000000000000000000000000000000000000000..05cd22fab9d7d0aaf9c71084b753a657c1bd0b1d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/aad2db4f9bddbc472828.json @@ -0,0 +1,57 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/64a95378bbe80f0866b7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/64a95378bbe80f0866b7.json new file mode 100644 index 0000000000000000000000000000000000000000..5afe3df8067138912824815390f524eb6e2d6bb1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/64a95378bbe80f0866b7.json @@ -0,0 +1,61 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/630c54595c4665964dea.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/630c54595c4665964dea.json new file mode 100644 index 0000000000000000000000000000000000000000..bc61510b4d23ed15a39530df41225fe8fbff3a71 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/630c54595c4665964dea.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/f0d9b6c4aadbf6ff1139.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/f0d9b6c4aadbf6ff1139.json new file mode 100644 index 0000000000000000000000000000000000000000..948775fd72fea03d34e103d116971fcd4ba06da1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/f0d9b6c4aadbf6ff1139.json @@ -0,0 +1,57 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/e3ad5f9fbc90bf9db678.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/e3ad5f9fbc90bf9db678.json new file mode 100644 index 0000000000000000000000000000000000000000..be43f0060c6087e3965686c2855c8356143064ef --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/e3ad5f9fbc90bf9db678.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6a38b2691d3af1df8f6.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6a38b2691d3af1df8f6.json new file mode 100644 index 0000000000000000000000000000000000000000..0b3df3c8d550d459d762ccfc01a6e4c194e203d1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6a38b2691d3af1df8f6.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3cbc1012faf986f589ff.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3cbc1012faf986f589ff.json new file mode 100644 index 0000000000000000000000000000000000000000..905d6ed98136b9245801e989d30ccfefdc3901ff --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3cbc1012faf986f589ff.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2ad69cd01383a682715fed6c7faaf92a72533ca6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d33c54b692225f4bcc1c024a373bea9e7e2ea73e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fe88910e2d8bdfc4ed07d60a785432c2132adf855d253730ee5cab99fc987e +size 84807 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07d174bb048c652861b13ef56cefa3cb44dd0d09 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6be3810e1087644eeb0f9826f50d7ede89fd89aff4b8297f3b959d6028818a +size 646144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b28dfdd4b213537b2dea5c6bcc967e9c9762156 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d61ac2165fde7772e8d3035b0f89b50beec5e0d305548cbacc624eefa9452d3 +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f5c22479cc9f64a07f5e8cdf30c3bac9c82cf686 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d9d2a69d8137f1bcd2b995a79484cb724ac968433fe19ab7907c62feb356fd +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bd5da8643aa9cd24dd2fc92a64ed9a52364d3fd3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1409b4b64ee616125ab959728c44a329793a9b98726044742b59807b59cc637f +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be8215e4db02a98e7c7d9a9b026367f5b42a91ea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ed0858162f8fc2c58bc4c23d0925dbeda5897dae0889f87758206676394f4e +size 81843 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eb1875b383bb1e081bc75dd345ff4b8d012ab0df --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f926365798c747440312dbc7ff4146ba80e22ac98ddfec57603085c193e3443e +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..75faf530d22be18c98c117014742201d25e57404 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71fc97a4abb0b279dd9276c43ed0913d1575127ba7945a92beded26c5108a485 +size 255104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..64028cc78541c1833466a343e251d2e19d025203 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70386c0d4c7acaf312dfe4908f119fe46d817ab3d82394d549c5ed84707d551 +size 80382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d84c3bdf240c037cf5b39e95b42654323f7652c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fda0704981a98eed90e4bd1f26629d080cf6ab923eaa7d620bcf725ff4e9176 +size 216064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..21aabc06f820685c3094536d505bb7087c422e2d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d723bb93434b21a2269879549dd3266e453eb8cad8956b70d634bc7d6ab5de +size 224412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a40c022faf4beb0fb20246d9c1f9c8f32c6900 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..727fe2caac58854e2f659732c11b6b5473ac4505 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6596d3769509c9146ae4a3975d1c70c2eb236f1527bf8cab5cf9306c4300289f +size 89555 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8faba5f2cc3e4cd1297aed03850898c53f50572c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d3fb22c40b3a4a317abb39e20413ca90c505881245df5b6fe3e95b5bbbfc38 +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..132ba91f0752a78c51a15ed98b5a36a0eaf09ab1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559331183c406f8d099d1ac344524b432620885ba9b04e979be27c169183a7dd +size 379362 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71a45b2e71496cd9a4651b67356cd1c41ba41ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7ebbb9e2aa4d3c3d4667f098e18faa9ed0a231c9831e73c8118bfad21264ad +size 91147 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3590747391a07edbd5048af206f752f5f4f8015b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3ca19d11f248080b12e56e27cdbb64b21d4bdf9135549a59a69fdbaa8aaef3 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d3e13d0992396c9c7cfaf02142870339235d6b7e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5e65c2494ac905de7039+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e10c2729ee94b3b5b69d792b97ea13e8f0360d62555b5f488e5f08b68d68dd +size 289031 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..49d00c61598cd7f8afc639e583dd59a7622b1250 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4ddfc86989747c08ca3a525de9f795475afa56281ca391e150429a79ef6f36 +size 694128 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f128b76e1f3b6bc3d1fa9d959f6c02b16dcab464 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414d790b9a8e15b872694ef9910b367d3aac6b9518c112451a3794b3ae9d868d +size 625664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a1896eb5bf31aa5ed19ba175b4dd00783d039d55 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d354e998f756d31fe63805e67de607b859b8017e6fa64ecab92379c9b30786 +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d8a9c545197aadfb9bd6525eb0012532cfbff097 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e174a0ad6384712cfa5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6da9a01bb2393906850811174acc7c5da9a7725c8f3aa6afada1e4ab908beb6 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ded076d439256223c0df9ab1dd39529cbd60ee0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4745beb85091594825aba95d8f89619176fb4e192e0ebd421787ebb9394fa90c +size 90382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92c50d65dcb7bbb578beecbe03deefb130274c56 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a485be79a721c7b20385+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc7355dc8d2d15baa38550f4d69a31b739a8a85159336074a32c72309404dc6 +size 359424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7d85cd3d975162683053faa369dcc6259604af --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2117dab3e3ed8caad749b8256ab430c591d3b72a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec57d9d2f9d9be197ad88a58ece6b14b0ccab97c121b8d20cc9512558ddf6562 +size 70276 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be22396259e7e2451ba8dd247ea4d7da00ef79d2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5772f9cd67d35f56f991f6d7a6d27582cc32578c305e8ddd12a7fa3559b77f29 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..365a3d68695b6f353bf11dc0bc974ed649466a1c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b559f9b8396895d936b0+80d05c3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44814eeb5d6f3c4ee2638b7c2eef8eea9101cf88679710e73bb50c119aa9c104 +size 289571 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbf6b9366d9e28d7f19321e85acd4cd96c0614 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..780e473009bedf53cdde08908b2059754fe87b34 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927cb41209213462748741781fdbfd5ea0d5d100cbd62ec70094694e96145b34 +size 97794 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0cdd6fc27d3fe411bc111e3a06f60bdd3a983137 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c58e0a47eb485c62b0cd+283df001/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622a34dc2081999f2beefb322905c2d688442a9d4332279f43b74720f97cf336 +size 410624 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9aac85f572373187ed5932ede47f59a7a91323f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0902b9467e36affd529236be99c466bb326eb59e56e6b6414b2bde17d7913ac8 +size 676474 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c8981fdea7eb4f062db0ba3b0c3e13fd9153282 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5782660a05efe31ac88b79a28ee46b7639216e804f8a0261a9bebf5ecafd780 +size 543744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..58ffdeb04eaa8f0385c20a6a712c264694de574c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cedace9d3d693aab8d35+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc759135fee4eca919c14b4301fc923b5a6d3269d1c84a4b486578cc3d6ed708 +size 563380 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a1b06eb2a8ec93f92eff30987ebaa97a88650df6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d345b4787840339dafc6759d57787886ac8152c17d09d39a9df9d836bc69bbe +size 82772 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..626c31b97ad4c84550a4cdbfc7be8d450e972652 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d8cfc8fee2dcbc7833f8+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a567c17534fc2cec84c7f1013aabcd909872b5e6ff87df4425a83976ef1292 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a454d7107211e73e42c127f1a086bf729c69e79 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79f98b7f02fb3fc41d499b431d0af72d9f4b2cce450d8342a062a580b1751a6 +size 81516 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1347c0076c91dbdc3308e60bdb13324b92659332 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e12a32198da9ddff7d98+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e11fce6f3cbb1af7eaefb83d30d962a5598c6a78ed782c810d6a55b4ba29ac9 +size 267264