diff --git a/.gitattributes b/.gitattributes index d389c6f4b357427f1141434e3fab211b58269546..467d7a2bc0ec72560aead8852228d3a9dcbc7bdf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4916,3 +4916,73 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7f33172c8c352e3cbb45+cd3419b6/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_7f33172c8c352e3cbb45+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e7927a94b94f192b50e+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_ac66a4f380d8429e6548+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/eef1cde6081ecab1fe84.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/eef1cde6081ecab1fe84.json new file mode 100644 index 0000000000000000000000000000000000000000..aebc22312cabc997311f76606cc71e85e7ff2b96 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/eef1cde6081ecab1fe84.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c0ec6017150d374284d8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c0ec6017150d374284d8.json new file mode 100644 index 0000000000000000000000000000000000000000..21d529725819988045d6abb398f9de347fa7780a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/granite/ibm-granite/granite-3.1-2b-instruct/c0ec6017150d374284d8.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/01fde036f298da576bb5.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/01fde036f298da576bb5.json new file mode 100644 index 0000000000000000000000000000000000000000..01fd8fa140cc66a784b9b3fa27346593969aff3d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/01fde036f298da576bb5.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 1 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/557109576f3f8cbb33ff.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/557109576f3f8cbb33ff.json new file mode 100644 index 0000000000000000000000000000000000000000..baec2b7568df53d6aebb6559a11e5954e7008974 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/557109576f3f8cbb33ff.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/71332c436959c6f2b2df.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/71332c436959c6f2b2df.json new file mode 100644 index 0000000000000000000000000000000000000000..faa5c029a14eadd4faaa2ddfb5a9d4a5215861a9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/71332c436959c6f2b2df.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d6f52cf989836d27b4a7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d6f52cf989836d27b4a7.json new file mode 100644 index 0000000000000000000000000000000000000000..033fb7c75c3678057a7806baf2b8f69e309069f4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d6f52cf989836d27b4a7.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d70d537ae88aaeeb0e4a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d70d537ae88aaeeb0e4a.json new file mode 100644 index 0000000000000000000000000000000000000000..f6597f2f3c6437f2743c95c7c7b2253b215d2fe0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/d70d537ae88aaeeb0e4a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/55c426279b3ea27aa80c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/55c426279b3ea27aa80c.json new file mode 100644 index 0000000000000000000000000000000000000000..e5b8666b4e91c8eaf9f652feac4a660e15f6ef0f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/55c426279b3ea27aa80c.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/f5762f0df58a70947038.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/f5762f0df58a70947038.json new file mode 100644 index 0000000000000000000000000000000000000000..a09f3e7089cd0bec1affe2f39fed542270587d04 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/f5762f0df58a70947038.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/e7acb642341522b87231.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/e7acb642341522b87231.json new file mode 100644 index 0000000000000000000000000000000000000000..0bee410415fc6b769eb87ccf19bedcfe62f62fd3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/e7acb642341522b87231.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/6b5316af77d112e25d37.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/6b5316af77d112e25d37.json new file mode 100644 index 0000000000000000000000000000000000000000..7be99e028937620386cb481286a0ada393415efc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/6b5316af77d112e25d37.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/cf7a57eb411c245d0587.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/cf7a57eb411c245d0587.json new file mode 100644 index 0000000000000000000000000000000000000000..3ff7e122b66becddacee99abe9c0a9a386f46f0d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/microsoft/Phi-3-mini-4k-instruct/cf7a57eb411c245d0587.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/9eeddc59a59f99b6c3e0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/9eeddc59a59f99b6c3e0.json new file mode 100644 index 0000000000000000000000000000000000000000..5617f22737df00f58b836612adb5f090f8eb2684 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/9eeddc59a59f99b6c3e0.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/310cb4f0cd3300451fd2.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/310cb4f0cd3300451fd2.json new file mode 100644 index 0000000000000000000000000000000000000000..5810c58464ed0e8f0eabbea3f992f9abfb6afe01 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/310cb4f0cd3300451fd2.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/6755c94eb3261b02a145.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/6755c94eb3261b02a145.json new file mode 100644 index 0000000000000000000000000000000000000000..b676720e5a5ac0218516dfc4edddb9bf1c1b571c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/6755c94eb3261b02a145.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/a4731d4fb6d2288dbc78.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/a4731d4fb6d2288dbc78.json new file mode 100644 index 0000000000000000000000000000000000000000..1e6196e536a7f7c82f6f84132f3f08486f5840ff --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/a4731d4fb6d2288dbc78.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/bd43b5032044547f61a8.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/bd43b5032044547f61a8.json new file mode 100644 index 0000000000000000000000000000000000000000..516253d107899606b72510e6b88c85983b8c85cb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/bd43b5032044547f61a8.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0814e1811a82e83c84c6.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0814e1811a82e83c84c6.json new file mode 100644 index 0000000000000000000000000000000000000000..368932805b20bf34a08d7cb8bcac0e54d94f89d5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0814e1811a82e83c84c6.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3/Qwen/Qwen3-0.6B/f0f402c449a4aad60bf5.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3/Qwen/Qwen3-0.6B/f0f402c449a4aad60bf5.json new file mode 100644 index 0000000000000000000000000000000000000000..3052712a6dfdb3b56ac3b4fddfd3550c514760d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3/Qwen/Qwen3-0.6B/f0f402c449a4aad60bf5.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/5fd285c0eadaa03f2963.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/5fd285c0eadaa03f2963.json new file mode 100644 index 0000000000000000000000000000000000000000..a082b583000f113cf2788a085c6d4959f7c0f7ac --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/5fd285c0eadaa03f2963.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/smollm3/HuggingFaceTB/SmolLM3-3B/b0160303296159aa9f20.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/smollm3/HuggingFaceTB/SmolLM3-3B/b0160303296159aa9f20.json new file mode 100644 index 0000000000000000000000000000000000000000..7dc99516bfc0a126b0c999a1ad583393a8e33ecd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.3.1.dev6/smollm3/HuggingFaceTB/SmolLM3-3B/b0160303296159aa9f20.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ed5bb81878bb252cba8c4c1252afce54e1ec7e24 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dc03e16062b1663ff5165bc8a89267d6fd336b6c7e071e7407b74fb98c700c +size 86123 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..396c8e992cbe03710a1ecf4b98286706fe5f1166 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bcd8486a5476c06ef6c+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5924e3b77731a29d1fcfd621b509f821bee84c4da050992d6c8ae0f2fe62a96d +size 257024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9fade9057b99c567a7c6c09a73d93076005e2cc1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edccb15dc5b87a2f7b2919ed7a4c0809618043bfc59d9ec4de17bcfef6704b77 +size 1050679 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e90895efde2ca6b5cb3889b5a57f97e96435fc06 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1de2e21ffe7389e8ad1d+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf12f7c40e6c65d05fc53c916e80d718ec9f9e94a6acb208794b477005d92b0 +size 5827584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71a45b2e71496cd9a4651b67356cd1c41ba41ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7ebbb9e2aa4d3c3d4667f098e18faa9ed0a231c9831e73c8118bfad21264ad +size 91147 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4e58ec08ee015d325ee65861459665dde1b55107 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e070ed42641ef2f47dbe21548d4d5eb645012bde0712d5ca5857e88eb8355a8b +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..763140004d73fad1071b9da84a8e146564d5661c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1f955163eec0c0a80988+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afda52dbba53bfca29b2dad8dc3c2d460c63ce8afc651b95c4eda744b31762c3 +size 289031 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0147127b77a373d54a3ad00aea9e5709f55fd952 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede5d93d6faa7bc9bce74154a2e7e644e0bd8c72ec9eaea89c14e2d7d881e34f +size 76277 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3254c6b26a04f5bd4561b6604694f88fb72c3ed --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2a1b0305ef3fbebc14b8+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4795bed827d6a67244f8f3e8824f79894cae978c52c4685f2a4e220c7066ccf +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb93f7533b6e6c111e93f59e09dda5b37395717a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a27bbdde94b8063477abcd34c10b48a8b37474f680d54e0d0f7ce3bdbd6b3df +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..136d6cf79d7df4f794feb7751d00a7d44ece575d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0e0aa6a0d4f579514559ab4ae65bc0eb7c4e7addaee09a3ee4c498f777c4fb +size 1926144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..51ce9276fffeb0622f2824c4ddd7b5ccef0a7c4f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c56cd496311b411d739+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09904e6b9b115a5a3422fe2d8b2c4f90dbace2b6e287a4e56e4e6e264498b9c4 +size 2082478 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0515175f6d499ba6958ab79e4fe33d28581f018b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd1d095ea14d42a5c32aa750b1d62842289724358ac1f6774badb09c866f698 +size 80627 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fdfce74a88feceec08aaa84fd7b5d8ea2b2de2bc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2d55f694b62e8f854eb9+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b89dcfa0f08e83d65bd822a92b731d3ee84ecba1eb660d5af3b8209f7cd49e7 +size 594944 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9aac85f572373187ed5932ede47f59a7a91323f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0902b9467e36affd529236be99c466bb326eb59e56e6b6414b2bde17d7913ac8 +size 676474 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a8e8d16fba24a08ffbb3a43e9eff58dae662946 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a578825ef8646927536b47cdb3fa276198a366020af795418bedda1f4afcc8a +size 543744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e7653df65b7dcafcbdd846ec8d8a1ea1dc917d90 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e7323d127244060130b+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c652f8b81492de9a2083a79be8f432f65d183bc5b87b8b37778641cda530b2 +size 563380 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8c60dc28345e27352f916d96bd05982b1eeb007f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11caa180091be1fb525379540450f3023ef60e5aa94c04160983e4a8edb13e5f +size 626760 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c50f65521f31366487c33dba12c4f7a5af79bfb3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aff487d040d355d7cb2fa50bcdfedf0e81b103e6c11fbfabb7152a9f793182d +size 1537024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..60a7057b908d03e4f47768db6fb173f7e529e260 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2e8a7fa1e77c0d74a109+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecc6fded2bfaf6aa3e9b17da30708f2a6321e77568604292d6cc67a892c6bf1 +size 1693473 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8731f0723be1ac135a68fba5aab8f26a18ac3e58 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf05c4e47905baf991a12992f3685b4a9f4a7d3e56f6c9cb2c9fbab3dfe6383 +size 923430 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..75bb23d6528660b4f7b4e868001706b915ebd6e8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32db0b2bdbf17f82684a+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad503e8856a54dd1a45a140f648a03a40603287868ff989caf8ad33b2dc28597 +size 5213184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9c2f20334ca8f742a94af772927706538373d264 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6dc7d45f-06ca-4f54-a09c-141734127236/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dbe6d76985caf7ca8fa5e443d13f064cea35d1d5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1602e881f5c6dd5465eb9bd35de42e5c7d0b86ae88ea4da4a599d4875c8036b3 +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d586cfc91113d6d85c08f1bc6ea9886650258c41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3977195999febfa9884a+4e85d101/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b2ad179b8579796ad87c85c6c267dc37489b1ebad64cfa4114e9dc2f5c9d29 +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2bfbfe8c31b0822c8130a68265a6351022d934a0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1ac5a26eea88f71a1991cf9d63d79f038da1cfc529e32aa6568c793b94f0e0 +size 1061093 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c1db4aef199372f83361f74456e8b38ed1649258 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4910ad1451750f3331b2+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39890ea316f4526f98d6b7b33488020167661f77e8020098a5d903889384297 +size 9473024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c5961069c2c7295f9dd0c09c8926bbc9b13d4997 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5087d0418049fbd5b4475f55a760dababad1aa115486dc375bb55b04a0d634a5 +size 83369 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b829fd301d03a6d89c468c28af20c021f829b0a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321c07ba7ca084998a820c1d966da0ef26d54fd183c72bb89cfab8583f7f2d01 +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5428ef664e8361f9a8510dbae16cc08f0bc416eb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_517620d83434e31618d7+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0e4a9aedcf450a60aae0c494acfb0108823ae8987f068181861630ade1c7d5 +size 296094 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eccdf4537d43b1b6dce6f776b7d0319ea18bbcce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f7640624dd097c4c870d4c6afc656367e9d481e5ad6f28177c5fe467b616c2 +size 579648 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4ec91e97730a2c97a0cb1115d91b89e27258daa5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ac1aeb80812cff4b4d76915d55a35b580aac87a4740690b2fb1f9b52653c5b +size 1844224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..aa48f18fd92eeadfd6f8132ce779e2400853fd88 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_573529673e588860faa7+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32764f731a49499257f1496df41b0e37edffffb58040eef958eeb3b5f065776a +size 2000673 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..71b70ed36aea43184091bc965cb2fc9bb31f0e4e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6a2bb62d-2110-45e3-8e17-e246ab82ccf4/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d5cb020e5c6f40ed475d4698135c2cd966420ff0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab117efc58832adfb3e5386dd4c81aae270faf31ff91bf5e40aa6219575ec184 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f66fc8526d90c876fee6f9462e60457b9e4b52c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5927db22f81fdb595f6b+e5475efe/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc84031f79236443cd31a1f0605c43cbf701a1bc680ae7712a4bbaac5fe0f7e +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d32dbbe4f16a74e4088b600953f14b223a09df6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c858d4d1aec93bd7afa50ec1843aed10ebfe548e7ddc35e26e16a776de21431e +size 739366 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83b7d1ba8880d7fcc412ea193af3c1a5b1486572 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5a06a2699238b840e153+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92041d5665e72934df51baddd5650aac6bdb05c62ab77f75ac4a38b1ac2ab403 +size 26133504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb166de8f3470f7b8863eeb949a188c9b3927435 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8993ddc1eb16bb97008efb7e4cdbfc4b0be347d84ad3ab779101af69b10013c9 +size 76332 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..586eca7d92232acd486c3398cb2841802a91b2dc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa55780f6d4cc4ddbccc7c80395e5cb0fc80706206e2c6b2e9020accb2423f9 +size 338944 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9bb3ed6d1c417fdb468d9196c539f6a7ff3b3b2d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5ea3e1d7b6cf5f9431b6+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c68785423fd05a145c56f71caf568584b803acfcc021eb0f3c095ac05f9971ea +size 347294 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7c28c36e23a61e75b8fe7b240f11da8c4476a486 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833b82cfd5de929b110eae7f2597028fd2aba27feb903dc4a6389317ec3d4c73 +size 85236 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d3b91f6983e9a8cad9872ca2ab263df88f5831b5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770fd1206a5629984460e7429939d19ad42d3ae6bf81b225a80642effe8b18d3 +size 216064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a26f76adffc18a2efb634a126135b25c41c972fb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_61dbdf738ca076ef369a+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0052f1d39a3951738cecaafefde0bb52a3f92ea636adc35066d09160a8feea +size 224412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dc80e75232e7b415325d1562f9518f26670d13ec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25ade0a3f52e4ef283ba2768a1420e559bb8c5c6ab4b261c432d72a52fc9e6d +size 863510 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2a3d9787b078b61d425f626c25f46269f65a629b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c484d4cc0ebbeed7504aee7eed6bf84f2f3dd968589ce082fa2a1cb7bd387a3 +size 6769664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..db4ba48b877a24137e0b61d8d7dcc0397fde0a77 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b4f6603f5b4d3a7bdf3+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b5e1dcbdfd4411f96ad27229b8e88ceeef882503d1710c845c36c4238e8648 +size 6936331 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e984f1bfcc67f74c845bbd03e125247152b9b6bc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828b718ac0d544f14a9a683c76b442c49520573027ba2e8347fdd5e79531704b +size 702870 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f77949d0c53dd7d1270252f643fe6e57161654d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24c747a5a3dbaddaea3e4994d94280b967b5ef3acb32721fa58583d3b03a97f +size 3503104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..53d3cee3947a35d7c9ccf71ce9e1424a26015073 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_70994cdd91dcdd578ba2+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e8172c712bd8de61bed43f49dae8fff625acfc1b2dff3cf4d69f88ebf311178 +size 3640300 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccfd8df1567112b5d3b3ae737d3bc010acf601b0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac09fee9beef4356d5fa3b7db4c5cc47c086e3b3d0c0d3bd4b31eb45baf9c88a +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..76c8b86ca607aca8d4d93e0973f8ad7e618e3e84 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad3ddfe864d117477df262458a6b6f23d3ff1d5c8ea630e42a3cbd8d49b7c35 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ad330e3526c5ed537f2ab3e6b181ebd8f60dc77a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_76e4434341e561172edf+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef1ffe202a3c91ea420f79083ad09e28762939bc0a9c2a223862a4c66241e8a +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a1771b3277aba6060610f046a8d70cc1d3c749f8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25adc0d745ef4f8d9942c335b0ca4adf03e64d76eeb4b1bffb8f65e2c0167988 +size 865460 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83e0e5edac93fd3cebe0ac08926acd24f660f91f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093756b582e019dce23+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d731f1002023542659aaf1cc1d9c7a79f802ef72cc2ac723dfbdfab797f432 +size 36148224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4db248f0709adb114792f622e0f8fb7a6fc29da2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151194d0fb53ed69ccb9ed2e6e778ca3c4e378f7dc502bd714607def47154e99 +size 694128 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d8cf6433396ae5a9a0a4ebee47a8c8dd703fe1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_83cf5c47093549d97b56+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ea3d05a936f21d4c9a03a7754b654b6cf95be775bf68ce1d227f694fcd9ee8 +size 625664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c75c75c5210e7e6f88b8af84ee26941bc8403c0d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfccdb8f604091c97ebfb19bbc0cb1f2e06177d5ac13ca58ccbc04bb54156ca1 +size 509252 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..497616daa1f31699815a1c1a8ec5e5d8812b106a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8c885fa7f628cf2adacd+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003f0fa426c5816308090501161f02290f5b3b8db1fea71bc3213d49b668c648 +size 41585664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4d03558d5b2155b0ae02f5d81c4139da10ec7f2c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c172b853d2ff9c45773838b48c9bcc55ea2a6fe543878be9924d7503c7d461a8 +size 392910 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0e3b3a760dab1c158a58cbf9c405e34b29ae34bb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8f87e4680261d822e2a6+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51eeb747e7fffae7a2b062586645806d141f69c6a6f3df0a2e24e30b1e49c5f0 +size 41626624 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f40b0ee45aca1c7e21a0aa6a6a8e59e2571d9f35 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1702ec62daf7053b2dbd068c167f1422d1e605d7623be441ed8ed851d95cf57f +size 70278 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a2dfea93a12686534b8dfbee4f071d2669a37344 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b100a7ac40df374b83f0a6cb2473ee1ded71cecea8eb3a7a2538e2d47eb1e1d8 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8563aaee79930afa949b01865548620e7c9f5ecd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9413970e765acfb7b93b+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877022416bcdaab2e25ce13b0e9f9274914d12ecd836d4524cc6520426bb8c1e +size 289571 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..679560a15c431200e743a6f8f26ee4a4f3d0a11e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016d7fdc55c153357483ec6d31a073a08dafbac2cde7a1419b21f52b984b7d18 +size 726714 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..69e614a07c8d153c0e13fa86eaa4adc89424835d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_94bb1db91a1d0f3e25dd+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa00bbbcd96e29fedef3b946feccb9d5f898acab1f34375a2f48b8afc5c3ac2 +size 25897984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2410e5e8a6c766454f698fe1c8eec680d4cd3875 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db430185e02dca8eb3db010e56ec6c7fa2161558695772818b3590c8af795d8 +size 87854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6157b7757aad7d462a8c12e088bb445512a2128a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9739f6cb6ad655504afd+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72e93c28c5b38226bc63f4b445bd30fcc72c79523a050e5b9d77660da0b517f +size 400384 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a4b86fb79c4316c7c0b82b5775cec64f438e7c06 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_97f4ec6d-f772-4c9d-9c93-5b5e0b2bca9d/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..49cebff225f2faf1e3f276f5d19f46d67e47de4f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:869538c4797720d8b953a84a235337cc5f8816898114e9542f84811df632038b +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3222d56a7fde491b6428bb266cf18de6827d2344 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e9600563811116185b7+d378418c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77be121e4387edcfc2cb6753b744479ec289e788f2596c0476b76855e79270e7 +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7e0c6790082c43cd145aae8cc7e3cc89e15ca8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d720ab66f166d266c2852b1eef13cd4d6f0638ca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98adcbeedb5b17438eefb927f8aebce924ed0b4c178268ce4440981f5ff8b1c0 +size 415213 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..829718b910a52703fa4b566a8daab44d388a906e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a09163e5ab9614aecf2d+677eeb9d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d763aa7c178431edccfb2ccd79badc921ed84ac35bf6e80f1a046d8fe4a97e8f +size 3666944 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2bfb537a43fedd8b8bb8a1e59f1e1a94592cf3ff --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4537b8efd8bcb411dec9bbf45c0b0fe49b5021d4b2f3be99596c57885a48f33f +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..367562791cf13453f566fce78a38783e8bfabe96 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a1c9a14d12d639c18f60+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8d49dafdf2e419d7cf3e5c3dbf68ee1871a98f41d01ade509942dd2043eeba +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bdec493f40832baa10fc555004c701f519155d81 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fff2222c80e609ede15fd7899202e3dcfec823c20d5693d35391a2f351de45 +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5590763c61f0eed9e098b7dd3a99facc7ff2858f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030072481fd6c7d80b617b0b351e1e6957b78b96a3e29d0d230bd28fcedc30e7 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9847f26fb9da32a1006eb446cae735b46dd46db0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2ecbfaef5906dad2124+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517a87a2493dfb014bd5f68fcab45118b1f28c460345b025e4b99df8c4007ed4 +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..432703ca06587028e081b3337bf8e5c0b1e77a10 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56077a01ff146a4392ab8f44a679b7a61a93a98b0689f1f004df657cc89d6d9 +size 418223 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2a74ddb15e3304913a21feb16b15329b89bb99ec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b00f40b875226d0729ae9ff1ca7c28224e0d947573296d3b772ce09b31b6b3e7 +size 3093504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6e455f58a0fca21ad36bfa46173a53ad18acf36c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ae17583ade4e9b69e281+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a9f4f6031ff19b690333a90ae02a74de0f4a248c6b807949e260be6ff3e3c5 +size 3167748 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5962ab26461751abbd86711979b0e1d9a6039ea8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d065f8d4d6f1bf1cea666bfce7464a2aca12bf44873244b9157c1caffd720f0d +size 82772 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..014f5f13d4861ffe9f677553a88e630930d71e8f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b37fdf56be00130a14e9+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d9e8112524094d41e817259f3f0fccc05a86667c23170541243a02720108a5 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dea74dfb3bec8c7968d37c6160ec5bfe0a8db6bc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7ea4024512779624a08b9442447c93f47d17ef5b03b786fe5ea61a5b733834 +size 1072107 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bc1ec80d072d366c0b2a5fb4912d8d02bbd8895b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a2bfecf99e7f625040ad6ebe07e8f4befdeb43ab951141b875e8017c591201 +size 6974464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c5cb93b8332afb32e269ea50172124ae9aec2543 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b769f1c4c2ced84b948f+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0030dd2889b381be5efbb05cb5d1f6fcf9c473a1512e0122d59606b3c782b0c +size 7158800 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c62298d8a08d196e9705a2dd5985a702ec26668 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b903d94040e120e0b411edbe0e71ac2ed6b07253e5156968451f84d12dd938 +size 380010 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a894abdf4062716314e15d6e17915f0720671273 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f1b213638039018e48e642e09e9c4ebf1b83f42cb1846a56844f52c2b6b6dc +size 1926144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..39b6d06644e8d3f303bfd637431a8737733e5ad4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd494b3349fccb264f56+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c3d620155e2fb600457a05225179053ce7d0c3ac8ffe07edbe1312510a4153 +size 2019200 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9f6d8a8ec94064b5762e1c93b76ab85cdd87c1a1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bda4418840c0cd6246ecd7ea471950539ea19bd575803e363b328cdb98b9d4d +size 593552 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a318087b0585115c6df1f1af0a616a9f15c2137f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c3d8546bfef5dce4f40d+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f7323a9bedca1e2e92223446092f5f8468315c74d6164c42d0737a47ecbc99 +size 3769344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4d6c014bd9d4693fdebd096b99cf0859b4de0b52 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b143f5b373e2d36f40b5fa9101856cab74ba3ed010d63e42b78fc9ea8cafa8f +size 94688 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b179d21951f2a8ebf1712eb12b2f849ccbe0ed3d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65f3473fa2b14f940a23bff32205b58f2aa9173a612162f12dbc3a761eaa0bcb +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9ca9fa183faa194da984f12ae71318f9b88cd8f8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_cc93bedf1ffea21b58ba+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc6e31c8d6dee70e7daa0174d82deedb51d0ae9c68e73bb2796b3b817c10351 +size 338402 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a66d2b5e2bfdea834b5c48c5734d111bc783cd0d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ceb5b7f758ce013d84d6b8f019608f3d1c9fb2d520f2bff26682850b5b969a5 +size 691657 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..490df9167af1bd631e3804bf7d02b4dbef274c4b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9795b0cbf74bed9f178e476624f7f7eb9daf8267bec6f72a5c7c5411a02c3d75 +size 12411904 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4472aa08b777bb7fbfbd32b9ef85940ee1dde8cc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d0dd97edadedf7c69272+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0e36518b0d43c0654010f5aca2c3510c59b5d59e8a3f25a7090edc14ecc564 +size 12559063 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb7178e38929da5292256eeb5b35acd77f8f44d9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b5fc2c042152ca44f6d9d602f0e045c6817a0584e944e7e73ed828d193880b +size 104026 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b0c3a63417f5df5dcb964ff64fcf29aa578657a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d55f7459a88a4b840dd4+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a589d111f6c2cfbea8866546021d7a1aa9b826b573d0d63e21b241076e20f504 +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1287701e89beb2fd6e86073a2673f2feb614ce12 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2b25b795c82867c213321d7f85e3fea471792863beb8de716943a03e3cd374 +size 75857 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6448badc65160aac22eb7ea31fd2024fd31345d3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b47bcf9e178acb1c32c73fee225cc437ee0757544d14be5981d972f028ffc8 +size 359424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6f64c8588597a5d77d48dd03bb093407120c8920 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d6a139ec8c530de289c0+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3d6927868af82a8fae04325df6259c4c13436e48a99e6ce82603363aea6f5e +size 367774 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be8215e4db02a98e7c7d9a9b026367f5b42a91ea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ed0858162f8fc2c58bc4c23d0925dbeda5897dae0889f87758206676394f4e +size 81843 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3c79e4d35452e5e57c56e14401efce13d39652e6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4066dd46e1d54dabdf88f312013ad294c7fc88c77a7fbb231922407f052488aa +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..03921a6014b02fee01d5a6c3025238519bce5fd5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d9bfb8a9c556b013ab59+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ced1bab1fd78821049fa662e57d160be710f7662fcc47b5fdf639b3ea68506 +size 255104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9403566972ba49f9188049e9b1d82d18187179b2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa0ba4e2d28a0f71e13ca970f64df79176ee70fd0b970cf30ed05823b1d8a87 +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f0f442e9d63ac64c5a68af73093adcc22b2b398e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_da2f9ef9477a4d66d1a7+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ebc88d61c444b20f154f4a45dcf148a9d6a8e782d55f93957aa93706df119 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0ee3d26347267c7b0ff0a70fd14c8174ad0dd45 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e323d3dbdc3856b221fc463f0cea182e2c70423c291e9beb5c2a93e844508c6 +size 90382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02ba71ae6082cf2b66985dfa074eea49c72333c1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e70394de1d791e12c520+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca15053b670a78cda44e3c2bea4d3c7903ed75006f6e5b9f2e3c4e7b0f2e50b +size 359424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..681f3ef082ada704c81c158ae9d18f97da44f804 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a05d55a3ba9ed1af510844c9797965d4cf4c24eb231b3e0e25021c14e5d55ed +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d476100ea79c4f84ba0eba50ac1b09b990345e0b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9aa61287e71bf00adb890d3ad0b7bd453ccbe3f48c9f1bb3833da2d5cbbe4e2 +size 1577984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d848ca4b2f1482967e1fed97c0397da6131e7c7c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb91d9a0090e7031df61+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1381fd25b587bbfbdcbc56934e73dcf78207a182c1095469eb58ba8e04d7f122 +size 1734318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8779b159830310394becc88b80f07f61207ff4a3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ce778f564affdc5295812468969394a609071dd4be74955ff14ead8fde40287 +size 686865 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f674bb8a27c0c25aea7610a03ce50449d0b4138 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f189f67fe2f9674d0690+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6188dcac1b34749ceb00a1307fc62540c8a38183c5633569882218f262106db +size 27997184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0443141a90bcad148a4c12fab2e075503ec33f53 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9328e5ec6c47a08605ba682901125ec76d723364f8fca10f81cb2ea8269cbdc +size 84809 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3c0fb4797cd8de223d47bc946dbffdcb0377746c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f359722ac2fcd615a216+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6e231017b67f7a56c58f65de6b3cf098859bb41fd2dfbda4905f86ae3ab033 +size 656384