diff --git a/.gitattributes b/.gitattributes index 9f2482b3b49c0df1099772f48717f464f5761984..1b835ff2a578ff829e77cb4c0a889d427ffa7c67 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16234,3 +16234,39 @@ neuronxcc-2.21.33363.0+82129205/MODULE_bb7697643c8e00d54022+ac10809c/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_bb7697643c8e00d54022+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_bc3563e123000ea21cfc+f40230a8/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_bc3563e123000ea21cfc+f40230a8/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen3/Qwen/Qwen3-0.6B/4ddf27fb5dfd4909a319.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen3/Qwen/Qwen3-0.6B/4ddf27fb5dfd4909a319.json new file mode 100644 index 0000000000000000000000000000000000000000..fa8f0e005dfcf9e094adfc3f62119bd454eee1ce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev1/qwen3/Qwen/Qwen3-0.6B/4ddf27fb5dfd4909a319.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.2.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/ee71309c308c0bf312e7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/ee71309c308c0bf312e7.json new file mode 100644 index 0000000000000000000000000000000000000000..38f2ce03ceef2b1cce3554cc51afa55fe8940ba1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/ee71309c308c0bf312e7.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/ee71309c308c0bf312e7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/ee71309c308c0bf312e7.json new file mode 100644 index 0000000000000000000000000000000000000000..38f2ce03ceef2b1cce3554cc51afa55fe8940ba1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/ee71309c308c0bf312e7.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb index e256ee770dbd1dad86c99d87e0b3150a4831ab47..7accf287229e661a26b7d8f3afac1fad75a321b2 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:770f59a15ab233bea70cb16360c196b23b356f67051422593116053095e3db38 +oid sha256:8c0311f0ae811c00f59cb813f4ee06008d0d18e5bdf0534ecf04e14c6eadcad8 size 739558 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff index c91a665bec69f0ba4f66e3255658f1b383dcd120..4f62e49ea3da7276401fe2d303259df31cef7b0a 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9d35248a93d4142a3cf1+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58017c0e14cfb2aa2617054665041a111f19a8658e3856ce7f80963501abfa68 +oid sha256:e80290bb0de9fba2d791707189cfe81ec5896082ad2fc95838ce7c687a2f6ed4 size 26133504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab6d1e215c42b3381bf5d7f8a3b4d6e1ffcc9dda --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd8dd07c208eb87030d40b6fe53666dbd98b0ad6fa3cf5fa46bd1e5d023563f +size 662166 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..426621532ed9789585d8510db23a24d153c701f2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd272e40e65fb42ee0c8247f443e277f005acb0ff4230bdc0d2f6c5dfdbe901 +size 27689984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff index 06c7bebc528ec9211d77c08dadc77a20c04873ad..88debf6f7e0465af9986d7337f4955e3f05a8fca 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fb43f4ea0d29f851cd0c0d3575ab21545fe6863d03f53111b381301e3eee665 +oid sha256:c2835fba37931b9c58c33d25543bd60d85d61d996e61ac3b6e78e5b4c95bfb85 size 1926144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo index c28017606b905a30c25d51567e0a3028594905dd..f50b3fcdb344198b256e4b4033b32a9e399d3978 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c2b3afa51c57d431a332+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cfc00602dde4b251f7ee5e382c599874555cf1857694aab39c3a65c89e133d4 +oid sha256:6af3d77381b3fde23da8966bdbf8f343010c5869ecb7fef585166f1178547f33 size 2082478 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f74210c6be7e2141894f34a6aa34cb10bb3d8475 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf35cc5188092a5ed97a2fd36b936a577ef7dc5f8e103e38714748065ecdd328 +size 693532 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca5601eb401b9978656b7b9703bb561dca8fa5a2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d1e28d83b0b75daadd33+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bbf7638bc657c8f8d72fdd57dcfbbc3d7ef74e645f02f878fc8178e8b3971f +size 11265024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e8c105dd66c72ab2c02adfd1c625791960294b6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd18126105abb5dddc19543d22a99b11b3ae776516a3845ea8956ffaadb15a0 +size 702870 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3ad2362c53425508bcbedfc2496cdcee8fd70f0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503663d1e9e83515a143dbe1a2fa6d4151c04250e2d0a53da66ea3d1a2e75847 +size 2059264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bf100c956f26eceb8d351bc65486961b5fcfff26 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d4fe40d7566b309ac86a+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848c9cf439fa7e607aff664a598fefc3dee96cfabe42dba3cdfbcb7241202510 +size 2196458 diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/b76d86cc7c31e0e29c99.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/b76d86cc7c31e0e29c99.json new file mode 100644 index 0000000000000000000000000000000000000000..656fa57a5a55308b153b42123d50688b1ab471df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/b76d86cc7c31e0e29c99.json @@ -0,0 +1,104 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "openai/gpt-oss-20b", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "experts_per_token": 4, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2880, + "initial_context_length": 4096, + "initializer_range": 0.02, + "intermediate_size": 2880, + "layer_types": [ + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "openai/gpt-oss-20b", + "checkpoint_revision": "6cee5e81ee83917806bbde320786a8fb61efebee", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 64, + "num_experts_per_tok": 4, + "num_hidden_layers": 24, + "num_key_value_heads": 8, + "num_local_experts": 32, + "output_router_logits": false, + "quantization_config": { + "modules_to_not_convert": [ + "model.layers.*.self_attn", + "model.layers.*.mlp.router", + "model.embed_tokens", + "lm_head" + ], + "quant_method": "mxfp4" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "beta_fast": 32.0, + "beta_slow": 1.0, + "factor": 32.0, + "original_max_position_embeddings": 4096, + "rope_type": "yarn", + "truncate": false + }, + "rope_theta": 150000, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "swiglu_limit": 7.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/f71907dd62c8b40e471f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/f71907dd62c8b40e471f.json new file mode 100644 index 0000000000000000000000000000000000000000..d83ed0ebbe4fe5ec5fc56f2f22c918101023dbce --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/43f30d107729db6b760b2a90833f43ca44f68e6468ef64e131f71e2fa9f5b23f/f71907dd62c8b40e471f.json @@ -0,0 +1,104 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "openai/gpt-oss-20b", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "experts_per_token": 4, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2880, + "initial_context_length": 4096, + "initializer_range": 0.02, + "intermediate_size": 2880, + "layer_types": [ + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "openai/gpt-oss-20b", + "checkpoint_revision": "6cee5e81ee83917806bbde320786a8fb61efebee", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 64, + "num_experts_per_tok": 4, + "num_hidden_layers": 24, + "num_key_value_heads": 8, + "num_local_experts": 32, + "output_router_logits": false, + "quantization_config": { + "modules_to_not_convert": [ + "model.layers.*.self_attn", + "model.layers.*.mlp.router", + "model.embed_tokens", + "lm_head" + ], + "quant_method": "mxfp4" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "beta_fast": 32.0, + "beta_slow": 1.0, + "factor": 32.0, + "original_max_position_embeddings": 4096, + "rope_type": "yarn", + "truncate": false + }, + "rope_theta": 150000, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "swiglu_limit": 7.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/gpt_oss/openai/gpt-oss-20b/b76d86cc7c31e0e29c99.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/gpt_oss/openai/gpt-oss-20b/b76d86cc7c31e0e29c99.json new file mode 100644 index 0000000000000000000000000000000000000000..656fa57a5a55308b153b42123d50688b1ab471df --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/gpt_oss/openai/gpt-oss-20b/b76d86cc7c31e0e29c99.json @@ -0,0 +1,104 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "openai/gpt-oss-20b", + "_task": "text-generation", + "architectures": [ + "GptOssForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "experts_per_token": 4, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2880, + "initial_context_length": 4096, + "initializer_range": 0.02, + "intermediate_size": 2880, + "layer_types": [ + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gpt_oss", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "openai/gpt-oss-20b", + "checkpoint_revision": "6cee5e81ee83917806bbde320786a8fb61efebee", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "num_attention_heads": 64, + "num_experts_per_tok": 4, + "num_hidden_layers": 24, + "num_key_value_heads": 8, + "num_local_experts": 32, + "output_router_logits": false, + "quantization_config": { + "modules_to_not_convert": [ + "model.layers.*.self_attn", + "model.layers.*.mlp.router", + "model.embed_tokens", + "lm_head" + ], + "quant_method": "mxfp4" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "beta_fast": 32.0, + "beta_slow": 1.0, + "factor": 32.0, + "original_max_position_embeddings": 4096, + "rope_type": "yarn", + "truncate": false + }, + "rope_theta": 150000, + "router_aux_loss_coef": 0.9, + "sliding_window": 128, + "swiglu_limit": 7.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 201088 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json new file mode 100644 index 0000000000000000000000000000000000000000..a053e27463473226ceb8cdec2d194848e3d0fb94 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/22e7cd94e28b353a796b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/22e7cd94e28b353a796b.json new file mode 100644 index 0000000000000000000000000000000000000000..dde84d80798b7f59e1d17b569672b59e312b8067 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/22e7cd94e28b353a796b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/535e7795fb656220e11a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/535e7795fb656220e11a.json new file mode 100644 index 0000000000000000000000000000000000000000..bc818356690ba6e4bdb269c525daf076da1b8df4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/535e7795fb656220e11a.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/644bf9550db2b89192aa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/644bf9550db2b89192aa.json new file mode 100644 index 0000000000000000000000000000000000000000..64b46a0be35816fea66110a4c9c210a4f56b534e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/644bf9550db2b89192aa.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/6501938b047f7d373cdd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/6501938b047f7d373cdd.json new file mode 100644 index 0000000000000000000000000000000000000000..52e41b427977c5543870b482415a564c8b08ece4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/6501938b047f7d373cdd.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7bfaacc5ae3961c38f6c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7bfaacc5ae3961c38f6c.json new file mode 100644 index 0000000000000000000000000000000000000000..ada2ce43e2e0614524c0c40a0f26b8e760650413 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7bfaacc5ae3961c38f6c.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/912ca3353189929b7b15.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/912ca3353189929b7b15.json new file mode 100644 index 0000000000000000000000000000000000000000..7ca45ba7a62ec88880edf379c81c5a9f408ebd2a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/912ca3353189929b7b15.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b2f13b35b2326e133272.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b2f13b35b2326e133272.json new file mode 100644 index 0000000000000000000000000000000000000000..a7217519805c95935a18eae32a0b0b5647fdbed0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b2f13b35b2326e133272.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b4a6b1d49ffe1fb09037.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b4a6b1d49ffe1fb09037.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d206ae3743cdd7446a88e0b4291c1273e4623a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b4a6b1d49ffe1fb09037.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 2048, + "max_topk": 256, + "n_active_tokens": 2048, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 2048, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b52dd6b442b63b75e7b7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b52dd6b442b63b75e7b7.json new file mode 100644 index 0000000000000000000000000000000000000000..d5752ae6bc6ca20199bac9f9fef2b50c76e411e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/b52dd6b442b63b75e7b7.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/bba41446ac8406c873ee.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/bba41446ac8406c873ee.json new file mode 100644 index 0000000000000000000000000000000000000000..04e0c2877989763094f1b05f5c8028facecb63e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/bba41446ac8406c873ee.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c67e5e7f18f7dbf76e0c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c67e5e7f18f7dbf76e0c.json new file mode 100644 index 0000000000000000000000000000000000000000..7024619dccb654c01547a4c6471ff547f6c44de2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/c67e5e7f18f7dbf76e0c.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d27d49077cbb0bf50eb9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d27d49077cbb0bf50eb9.json new file mode 100644 index 0000000000000000000000000000000000000000..17b5c8a461f9df57bb8bde50dcf20e97cbd3ed4e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d27d49077cbb0bf50eb9.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d59c183509c2a322fd8b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d59c183509c2a322fd8b.json new file mode 100644 index 0000000000000000000000000000000000000000..e8ed57d524b2d43760b01e612d298c13a616a9fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d59c183509c2a322fd8b.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f4ff220867f67d4c7b95.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f4ff220867f67d4c7b95.json new file mode 100644 index 0000000000000000000000000000000000000000..b319029349c498967865a5a00ea12b73f883ff3f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f4ff220867f67d4c7b95.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f56d8d1a0bac69205ae6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f56d8d1a0bac69205ae6.json new file mode 100644 index 0000000000000000000000000000000000000000..095de109a0d1fd656a7498c3164fc49e67e0175c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/f56d8d1a0bac69205ae6.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-8B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-8B", + "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151665 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b0e587ccc95ec0de8a792f7f6a7433e045ac2d64 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff76ba7d15f95d31c450a8344c01924810ab53f588d7ea80b0bb22ed28f06609 +size 628810 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8485c39f23e28ffbb42b6d2b80c28422e896b1a0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_00fa01e7f21f195c4444+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2499d61a87073ae61b4e0906914f9d27a68f55aab1b28e6df1428c6cd6800449 +size 2591744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a5dd685191d9afea66240dbd1b75350a11626e6b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90610188fc62b33403b5d73edcf29cbc5b6d5cbec9a20d45bab4b5bbb232c7bb +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.hlo_module.pb.lock b/neuronxcc-2.21.33363.0+82129205/MODULE_0fed968161ee53fb93ea+fb4cc044/model.hlo_module.pb.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1771df7f8d71914a5430ddb9eeff9e791a2f03e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef78b6be207458b4d706f2b8a8e4ceaf2c439a683b520e876a672295b9e1e70 +size 6660 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c494c76882cb119f1f713e293603c6f8c1f7675e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12461911462967760525+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f8cf54feccdb2bebef183894f8c6e77075cd815a89e0c08e0ce3db9d43f267 +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aa825d706cfa2611c5d28c73948e471426ecef95 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b7fb767024c9ef8656418b8513c712c59ed30166b6e6611d5a6de28c81cee3 +size 749768 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca590c017797343089edb40d0149c0f6ec77f640 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05c4627a699c9e65c19e4bb25270022594b37e908346fc08720cfae707b4af3 +size 2683904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b2b748ec59b13555336fcaddc8bab9984d536b6f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_12a28df006c59e3576b1+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889f0d022183f793def364922c6735ba6883959b3ad1daf2bb32f8611625d0c4 +size 2775382 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..28a614dd8354257d7f634d3e0d657cd277808343 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cba9308672e8a138b4aa06d80afeb7229d6ecfb63a9919f2b6b13946f9f603 +size 6623 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..049601b35942f7369f9992b4e848db49f1594499 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14192315105465839210+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa37e2f457a6b6e661ec764baadd11bb03dfe0e6778ab0d3a9a1509b9d5b704 +size 144384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20efce52679a7d1680eaa1781c9d142e75fac96f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff052436875b751e650a21d3668777208e053a4e9b9c9be2f04304560b7f4b28 +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6631e0779bf533912ed73f998ffae5b871859f2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1582447fecd752c400bc+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f336e6859601c755329ace8c61025a20525cf6956b5de10ec57704b8381fb2 +size 14582784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..350454bd3b938193d89985b9cb9b55bf58942374 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac4120d93751b773be328d798edd0a56cb0d4376a0e87e1fb176a5c5a1630e7 +size 6330 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c59568ad27055711ec44d136e6816ff693e9bba8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1605570184959188488+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ac253c83de2e8ac5f60b8e12bd9ac46376e92a10aa9dd7460c1266da13c65e +size 144384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ace530fdda44a6c5c10ef59c3d9d41127b1c3c87 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b10271b335d17ffd2b2bf2bd40777a153bcb863631e766507573eef23d36ce +size 6557 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e269cbf7cbf8d943ea488aa797648879d07ade8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17516957483816756456+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e4c97e6d323ec308232a65d58157ab8877f351006dd9ae69b6b1bb0d2e23c0 +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0739b79cf2e014310efb361a797aa02146a6c8fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f96b39a10aad5181d5dcd7450e843a92552de6efb509fd2ba82f6963a11a66 +size 7368 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98863f1a42ebd6a13728700876c6618781a6353a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17567634454620665439+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6968b2c42d664a955144a22b8b8c3e19185de9173bdaa29d47f7ce1f606adc86 +size 349184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad562357372559d0e31d1af7d9282b37e21be840 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec02c648d7c9149c526ff01b6f19a32d7fac6aa9befe80f90cb4e27d0998751 +size 6181 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a9bebcb79e0fde762d7607c25004507904d1d049 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_249689642401186199+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1768feb6f0aa005dd869879495f1db748438de89346d0441f3ef0964115b3415 +size 236544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..384c2db71231f562c8f20def8517eb124b8d78c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2073f3027668ae7d71dc2f9882edc89ead16a31bb764bf37cbaad7a729dca46 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d1b7388f3d3a6567bf18dca11426db0a93e7f376 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2749861b5474cc5ff87c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3547de72d19b8d4ffc5b613b5bfac89965958fc6b281ecb027163cacf3efeaf4 +size 102728704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ff226fd3b0f43aafbbf7625884dd71d01505bec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c0b35c94f747406a071e251458e4d0537832087ae67e2b4d71bd5808ce1937 +size 628810 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cb0542ec2cc58270f5ddb3f2c1e6dbe8b3a5e8e9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_28ddfe81fe61b9dbddf0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505641aad0945cfac0f3cc32b5287b3835a5ab23fe692ce8ece1d4b5284bf04a +size 33680384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..11f79f16f4c1bcf766fedc4fc61f30e63afdb3bd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b449d37e433b2a93904a382f0da6cf173cabb7d4b0bd485f2f9a6a5449bc2db3 +size 628622 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dd70411d47bc4b141f1a494b890f3c76864ae2aa --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a22f3795f95b4bc7eef+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8343dba458a676c12080bb895d46a7e21990efef72932d912bbb21d84c3996 +size 4097024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..06980d0c7866e5949e1b19f1d7b0f7338ebab26d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bf84a966cb511e15031b9971b2364c5660f93d3e0bc8fbab5ae6a4c80c2f8f +size 628838 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5955151b803d5b9228ef95a4ce8ba8367648b64d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_446d3822c1ba9fbe18ba+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5316fd3e1b48186c730dd2df2281a8c629d8d28ced1e458a0706f37250c6cd9e +size 7558144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0fd529f72ac7162b740438b83483aacdc08c1f16 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362e61a1e688b790ba7e1e167518c26bc1b461f2ceffb1392a8b4b9c31cb11ca +size 848917 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca3ae2411cfbf65efa301c4bdacae456d14ff533 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4f6bc8a0a1a483ba7b55+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954b9bd0c7c5341645d2780bfa2af47ea278a029683433d50fa95835b6bc45d0 +size 13558784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..58a3cb38e849c5f53d4f987f2a84d927c161a852 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e35cf1be010378c7686985907b8662565b3f1b7bf2c7b2c1b17bed8d61a57e7 +size 857165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..440107ee717d80afe183e52ba5c8eb9205b23867 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_541e319c815dc74f5f11+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d0bd22de7222c0ff6c0db668999f60f0e9222f00cdbc5f3d2d5d3b2823b383 +size 150303744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4db13944947c9e367a6e498b103352f69e576eb7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff83d3765a26dbf257fa5a33c0b0a477f92c14ef79476eb7a6997e111564fbc +size 675005 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5b660e6e243b87bc061b56f19bcc74fa5af4b465 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb57e47b1a20cd04ebd57de701bdf6ad14a20ea642dd9dc28b01d42ab9ee22 +size 1874944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..744579c17ae8d812b9b4a8be46be8f14d968dcd9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6f7d67366590b6d1215b+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604eae1eefd10918b42b50ef8947b5c82f38cd97f3b78a1ab7c2f1ec67d0a268 +size 2001385 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e96b287840f5bbc1915cc06d858b0c805e71e0fe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbab39ac5dbb11baa75ae3be7e58a73294c952ac058234af24a756f51553e4f1 +size 848773 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..284856de305b09624ec06581d1008a79eb436377 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_75cb0c08a4502feae194+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4454dfa9679df92076a305a663ed933d39a386876b846b24e4388a3047f65fa5 +size 5786624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..18a637e127b1fb20ac03a8fc0478aacd73ffa3d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05806db819569741be840bd7197b583f3ff8a86d99a8a6eb3d9f866fee2539ff +size 628622 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..128f449a3c3eade5603bb64b03e363907f43ab51 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7eba564503196965e26c+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89af9cbc00e1c1dd089d9489abf336c46e232eb75436449da6e2054df5ecf6f0 +size 8653824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..92fa7c78c44b081a901ddde2f749e1479e388cca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a978a7b3f11723c0faa5bb64249d9f7c76d0ee9654b4678d1414486e23c3cc87 +size 730554 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3758d6d375498433bc1ab0da3b59335fe137f862 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_813f4ca5979c12f89e82+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2b9a93b0216163892e40ef6e3c42c0fdfed3e25d49176b2dcf683a359a3e6c +size 35615744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf451156cc2ae4a5108bdce48cd976426a45d684 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128abaed1aadd52fd9f95f287daf5aaf3dfe175895448c0aaaa437b83a0d30de +size 573205 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.hlo_module.pb.lock b/neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.hlo_module.pb.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a3d2a71e618e12d46aca8c8b642cbab8ed4631ba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44678afd1a7550cb53901df3c0c05ed7a161b182444f328b6ad05f3ff3471c5 +size 848986 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..152ff5232ae6cb773c9d68b719168f5b2bbeaaef --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8cdf0668abc5ec3574a0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c359dfd9fc0152d2e945b00a54494097815d2ac80647742cdf037a07b254b8 +size 52081664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..15e1862cd0b557d2267d36e866d318466f39dd72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d32495197c4be270e318a8d3d3e14dba49b06d1c4370d2a6fa8ea279c55d90 +size 857100 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d22c4a82d1ba9dc74cbe25b4bd5984c7124c140a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93667ce6adf087dd27b0+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7854373fab22668563fa7c8caa7caeee4c5994a5827c6d80920904e6b556e31 +size 120761344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4faff12152ef44e9a01044cf3a1c7d40b419159c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0d891fe50c75a72c384daebff288d9d5a9a6ad43c46b1b5180f22e123905a0 +size 8160 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..33e25e4bd7bd8ea4ec0b895c869bd47cf0c5c756 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9644019637695497943+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5b751929554f061316729d2ead9ccebbcbe2d833e662278347b2afaecc2b07 +size 400384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e619189e2f5bd74464d609f32aba0a8ddd561bc8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91dc08a4d9c6e0832c20a8f73b8d459746213e89525f96ab5c139102e0741f7 +size 6973 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f221ccfdae9fc68f9fc274e81b2667025a9570e4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9847227195640834860+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5243652a9eb2a2cf58373500ed3ecf38d8f9d39da8d585c4f695d2bff357404 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7859098f88e6d409f2d9f47833efdbb82a11d4f4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b771e0305da6ca5b62d4c57e4479cb29e949987fecced245b07edc7ff8fc8b72 +size 850786 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..10594a3b6c0d3dbc8e8961f115f77df5c974ed77 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_992f9d164a8d61e4a43a+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66654f75eee719f128b3433eed15e15577835c72f8e4e6e24540ab483bc0d553 +size 41872384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..397a31c2d25d279aaddaeddfc72c18aa3cd9e85e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee38efece08f60c8b1f87b4c3e5079b110b1e9ba3cf971c8a7a2c1f53302b8c +size 628841 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f6a15f90e5d29b3e96609e5434debda6dbaa8d0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_aace4e3c5cf3d468cf2e+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4f846f25768eff5ced8d202fda4851705c839d51ad40e2c397bd08ed09668e +size 16692224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b958b6cdebae0e3c0f6dbb6ff87879c2ef6c5d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b336754ac592306aa449e1ba96ad4dff08dbd70935542a10e13e04361ec42f +size 848986 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e238d0f45ca4d74b8f542eb073f591f1f3b02d0d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b3ac1cf6845fcd156f52+fb4cc044/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48e620ed9d06d9441d74ecb55dad062f428c9df80450482ac76d1f31ee9dc99 +size 21320704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22c9d33be1f67a39584291fbf451e8b195d7c6c2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708de93ab2fb11c97e0c5972a57510693eeee2012515d4341105a792518d2364 +size 757413 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dea9ac998f78ed909e9513e13c11f77bec38a4ee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd2f2fd229df31e1d6fc+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187868ee3e0156001bcf75f9111c7381352668212ee3d2554e559c26b78f98b8 +size 35697664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fcb49c0c21b3d9d8f66fbf8d1f45562e320064b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b8ce348e1c699ad514c10dd2511e5610b2ae74913222d60baa3a3cd30c79c5 +size 648764 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8169770329f16b0d37c7a6cd5e535d1069c71596 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ec5aae825bbb20b006d2eae101a0361dd70f8157bb21bf31eb5bee572bf567 +size 1844224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c54f86da34bb487dddd7bd98633c493dffc4fd94 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d4b74a4b48943996d251+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8aa5f130851596ea3a75bd7f87a981012ac63364990a2d8ee89dcb1e1ab18d +size 1970833 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5d280d241359b9925eb3bd2e0c057c1e34c74216 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86b047febda300dd125a4df08434cf027a1b6b58ad8f5f5473d014a0a56a32b +size 1735094 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c05893476f3e3f4226a6d22187f7fdea45cc4083 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fc8e09ebb5e540fef7a3+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b5d232e94fda61dffc4b2dd118ec814493c5d46284f8f2e05d4d37b89e78cca +size 9288704