diff --git a/.gitattributes b/.gitattributes index e598aa6fe1966f0e8eeea41bb67b4669a4cbf9b4..599c39a499cd43b281d124a1a65bc0af0f220075 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5857,3 +5857,20 @@ neuronxcc-2.21.33363.0+82129205/MODULE_d407bccc563987df7700+a02c3a36/wrapped_nef neuronxcc-2.21.33363.0+82129205/MODULE_30826b478ad3db0c312e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_b6a329fd6d4912085786+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_b6a329fd6d4912085786+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e3a25f6080ce21ddbea5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e3a25f6080ce21ddbea5.json new file mode 100644 index 0000000000000000000000000000000000000000..438936c6a1cff244c5eb54294f225f2abd03b78a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/ibm-granite/granite-3.1-2b-instruct/e3a25f6080ce21ddbea5.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/4de824d09680ffc30dd3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/4de824d09680ffc30dd3.json new file mode 100644 index 0000000000000000000000000000000000000000..100ad08663244d9ffbc64d79106226b15105f03f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/4de824d09680ffc30dd3.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/a5b7f4c133e2b95a4b18.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/a5b7f4c133e2b95a4b18.json new file mode 100644 index 0000000000000000000000000000000000000000..ff1575985d486983ffba15f8bc78edfa03a207da --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/meta-llama/Meta-Llama-3.1-8B-Instruct/a5b7f4c133e2b95a4b18.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/9468666db72cf8e90bee.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/9468666db72cf8e90bee.json new file mode 100644 index 0000000000000000000000000000000000000000..c70548f5902b0e2a61579cb02a6b98b27cbc0c6a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/unsloth/Llama-3.2-1B-Instruct/9468666db72cf8e90bee.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/c76fd6924e0386c670aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/c76fd6924e0386c670aa.json new file mode 100644 index 0000000000000000000000000000000000000000..17b39ede2176c97fc0f3e4e7112d8e461e5f3f2b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/c76fd6924e0386c670aa.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "f39ac1d28e925b323eae81227eaba4464caced4e", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c8931c218bbf1b28c5bd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c8931c218bbf1b28c5bd.json new file mode 100644 index 0000000000000000000000000000000000000000..1c8efb41ed2b79384e8db4c81d3a5cee041d4a69 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/c8931c218bbf1b28c5bd.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/6b58992358ceb302871f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/6b58992358ceb302871f.json new file mode 100644 index 0000000000000000000000000000000000000000..e88b40f4f74421c8a191e4e5c385aec17465e263 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/6b58992358ceb302871f.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e31d621e27e1c8828c1107f01b5f121defaaf5ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e345881b258c1681ba0f32fff9a68af5ef601037728aa658e43902ca1367f6b5 +size 469824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfe0100adc4d879dd304373eeb9e053fc1055008 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_07b31a4647d204c9e0b6+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc11433bb9fc0decfbaac5eba6dd51d6fd0d0a08302227d0c5773cb63650840b +size 41544704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..731918e11ab8732d90ccdebb121b8fcffffc406d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118a21091459d68222b43c52eabc45a5b2c9b6d5ce360bd811f3fff671ebfa98 +size 928451 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7067c90122fdb76f582303bc421d5a8f2839ed45 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_113098831bb4bba15e6c+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b861242d32c493c2b4c4a513db45b78ac2b632fc86b9435c977fa99a4a9c748 +size 41851904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..52d50c2e0cb4767b2bb17c28065da7adec730a63 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b52bb25f4cffe3256b978fbd1d28251b62ad5e5e6567f8a72c988c0773db6e +size 1115300 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c5fe23720cb25241ff0e17d7a01000b4fdaaf9a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36abdd1924a09a40c8881de6092aa997d0c3383d5aa3c4961ff3306b1e0a8eae +size 6749184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..de7fdb53a7e7191a7b177ad53059520205626f84 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23c252d2e1c627086de5+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8d20fc6e85238b52c8d9c7ddec28f505a1b493e3fb7589dc65e4741195f0a5 +size 6933139 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb08106d22b635f56fdf2c866e4eba2d40ed9980 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b0027f8dc491ccb3d8ffb5cb06139ea88d0c9af07c9b27ca39e731c95fcf5c +size 799960 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5b8098912d66de7783febe8cdbc0d63bbe14c492 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b59e5a2672afe6c4fa02c26ee4ff99bf4d2dbb3c302fd8e1905210547b47ec +size 6872064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..120fb3d8d9ecfb76d1951c6fc3409b48be56f3f9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_385cb14990d64a072106+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a919963780497d2a08987d60c2f320cea821423e1bd7f64daf07e8ecfe8090 +size 7019528 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..54b6a80a50913ca448f778edf4fc772109e2e0aa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a38afe6d7755b3854989b78c8e6144ea5f0496c35eb945a2f554f05305383bb +size 387883 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8bc553627ae49a19c5363ba729e22baad62cfc17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70030484220f658f48d6402e159c5eb4690dd4262a3d1907296d0eb6ee63547b +size 2939904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f3214cd14207a0d22d8ea172d17a19e5fdbe6a92 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3e0552e84c0922533e51+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4701a18744940774dd6529471e1e0cbdbbec2071eaf9d636590ff4c25c029c6f +size 3013890 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..62471d4d202aba471cf87a22e290bbc05c029468 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48376071a4903c04e14ec5d1733b426d324003c9392d46b6793b79af6d98b028 +size 884130 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..43423f815be8579b669b141ae8c68085579e1172 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_46af0f795cf30d05b36e+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa72846d1bff0674ceee812f7a8f743061a1bd918e4d219651b1046c1747662 +size 5192704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..26742f0504fb650ba849e4d1494a9f2ed3ecbb5f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0f9a9a81f29f76c3d97f16e4f250edae4aee263216b144fd949b8080264604 +size 1084587 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..623db56430d12726476729c64a1c7c14798f98ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6760fbbd001e66ff21db+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b91765afe49c8e6462f678b6acc3be2400a38357af326604210328a5a20293 +size 5807104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7aa0176eedca1a0b2e1d6d090bd2d15a2ffd0d42 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31eef2504bacbbf5fc8fccd8f376ed87b5a920bb2e37c094014cdf4de683584 +size 756502 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48fa4156b2cdb4a2c943811564696e07dcd5f16d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deaed02fe7a9e3e4a16f6e0ed9aa748a68719553482950cbc8b1ef45c69d58a4 +size 6902784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..78aa7e337d496774a9f59fb64037a5c697357ee5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_71106d0443c3197ff0a7+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72c97c18038e1d63bacb80b3eaa909dbabaf6a3c75d28007980b69c275cb268 +size 7050133 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f2beab95ec15a260647ffd2633adf8807ac0262e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08660f19cf8f59f4a1901ebf6f82e2b9e7681805ab114cc76ca4fe48fb37746 +size 832979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..61edc504d22dcb7f82ab5e24e5a2665d73841656 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c80b5bbe3687fcbeb43051790f19ac8d29f7f850397ba25f08e0e17b2d2ba8 +size 6605824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4855a8c6716c3313602885269eae21005fdbcd53 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aa1a56237e3ad1974672+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e63fd4d850071fa0c6533f3131bdac8bee508de2f6a1821183f07bb15955e54 +size 6772233 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e1c15a80ce2906a1ffa179e2f0149d7a62867816 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c36c3ae2befa20992b87f7eb3a8b50f9c5560b478477c0301a689a66f3fd53be +size 661369 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..03f42c08dbd867607a7e063e4d27ac7005b8c5ee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0450789df9d43606b7e9b514575afe9c978dea0fa49bc4a17345e6e39a02512 +size 12401664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..29b43b295cef272ba0061a25f06075df37cfb976 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c3ed26835961cb74a993+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f500e6454dee5d5fb44419de47072e3cd3e5014f9d3f225515a5f8b8ee1782 +size 12548565 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cdec98f00cf0d75d3373a6ef10571a609ab754a7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf3533da5767894877c9d74d4264efc97a93ef15f7254359399d3e56600101a +size 1021532 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8cd42d8ca18b45f172038ccceed587f2e8a39f31 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e31397d074de214d8505+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e04324b65c1b9bdfe725ea67091ac79e160a504b10ea43d3af075e8b8e2437 +size 9360384