diff --git a/.gitattributes b/.gitattributes index 78fba9eb75092e021cb07bedb37aa5423ed32ea8..15c27e85b701cf06a7bd837fb2dff066e5a05890 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16503,3 +16503,52 @@ neuronxcc-2.21.33363.0+82129205/MODULE_fa44a5acf289ee1ecd41+8ee4e2e4/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_fa44a5acf289ee1ecd41+8ee4e2e4/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_e7fddd20b107d5347811+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/8a4430107c8609f149e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/8a4430107c8609f149e2.json new file mode 100644 index 0000000000000000000000000000000000000000..ded2bf1372f02a80500d56b67a2afb26bd2452f0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/8a4430107c8609f149e2.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/de33f9f75b62f508e84e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/de33f9f75b62f508e84e.json new file mode 100644 index 0000000000000000000000000000000000000000..7f93d56f14957dfee8d3860916fc3c3a83677f5a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/de33f9f75b62f508e84e.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/8d757b039a90a33816e7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/8d757b039a90a33816e7.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae10ecfef99d05c1f6ca1270b5aaa5e8a3a302 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/8d757b039a90a33816e7.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/287ad99b2a6141ebb12e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/287ad99b2a6141ebb12e.json new file mode 100644 index 0000000000000000000000000000000000000000..0cfab2e23ac7c049d4e6787e9d5529cce1e66430 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/287ad99b2a6141ebb12e.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/7911379237ca2e32dc36.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/7911379237ca2e32dc36.json new file mode 100644 index 0000000000000000000000000000000000000000..b7abc9d3dec749f8285ac39320bdd9f8c20af6cf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/7911379237ca2e32dc36.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/0b9d5658f10ecb783ea7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/0b9d5658f10ecb783ea7.json new file mode 100644 index 0000000000000000000000000000000000000000..e9559fa9ac0fec3c91e2f208f3c8626b0da21869 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/0b9d5658f10ecb783ea7.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/3aace2df87913058a524.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/3aace2df87913058a524.json new file mode 100644 index 0000000000000000000000000000000000000000..1f754fd35c27e7b80126e0dbca8eefa1728627e8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/3aace2df87913058a524.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 6, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8ff57fd8c6031855b0ec.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8ff57fd8c6031855b0ec.json new file mode 100644 index 0000000000000000000000000000000000000000..02c7f33ef19df04c04a5d883748904bc6627265d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8ff57fd8c6031855b0ec.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/c100f806a6006feef39b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/c100f806a6006feef39b.json new file mode 100644 index 0000000000000000000000000000000000000000..c9b72915e2e28c02839a395a223e356f054db373 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/c100f806a6006feef39b.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3943e7730d04a5ca4a9d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3943e7730d04a5ca4a9d.json new file mode 100644 index 0000000000000000000000000000000000000000..c921acd648c5f2aea0db65ebb0534a92fc25e40f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/3943e7730d04a5ca4a9d.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/fa7402288853e31635c0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/fa7402288853e31635c0.json new file mode 100644 index 0000000000000000000000000000000000000000..f1d778e317c7d626335dc45508001b8130db5111 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/fa7402288853e31635c0.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/5b6c82d29f13b10e5052.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/5b6c82d29f13b10e5052.json new file mode 100644 index 0000000000000000000000000000000000000000..a70b2bcb1795cd93fad0a29851ba4e2c9a2c5f9c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/5b6c82d29f13b10e5052.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c4069e9e5ecd272fe629.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c4069e9e5ecd272fe629.json new file mode 100644 index 0000000000000000000000000000000000000000..0c3d5cfbda9845ed89e3fcc5564c338b3fb06603 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/c4069e9e5ecd272fe629.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/262dbf43da8b9b831afa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/262dbf43da8b9b831afa.json new file mode 100644 index 0000000000000000000000000000000000000000..242d3b7ea7c48a5c3a9b586a71268dd0ac127fe7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/262dbf43da8b9b831afa.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/a5218019da46cb49d020.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/a5218019da46cb49d020.json new file mode 100644 index 0000000000000000000000000000000000000000..59a2b9c1746fff9fdf8a708bbbc36df6a94f14e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/a5218019da46cb49d020.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/c5893ac2150961103e95.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/c5893ac2150961103e95.json new file mode 100644 index 0000000000000000000000000000000000000000..3698ecc698eafafe478872415993996b43e26ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/c5893ac2150961103e95.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/02e21fa4187e1718bddb.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/02e21fa4187e1718bddb.json new file mode 100644 index 0000000000000000000000000000000000000000..faf3c06174201ccf396870279b6a8545ac41750f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/02e21fa4187e1718bddb.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b348680583357cdc479f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b348680583357cdc479f.json new file mode 100644 index 0000000000000000000000000000000000000000..c585a54b7f2df5d16d4d0bf054a463c0e508cf22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b348680583357cdc479f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/google/gemma-3-270m-it/c5893ac2150961103e95.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/google/gemma-3-270m-it/c5893ac2150961103e95.json new file mode 100644 index 0000000000000000000000000000000000000000..3698ecc698eafafe478872415993996b43e26ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/google/gemma-3-270m-it/c5893ac2150961103e95.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/unsloth/gemma-3-270m-it/8d757b039a90a33816e7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/unsloth/gemma-3-270m-it/8d757b039a90a33816e7.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae10ecfef99d05c1f6ca1270b5aaa5e8a3a302 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/gemma3_text/unsloth/gemma-3-270m-it/8d757b039a90a33816e7.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/granite/ibm-granite/granite-3.1-2b-instruct/8a4430107c8609f149e2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/granite/ibm-granite/granite-3.1-2b-instruct/8a4430107c8609f149e2.json new file mode 100644 index 0000000000000000000000000000000000000000..ded2bf1372f02a80500d56b67a2afb26bd2452f0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/granite/ibm-granite/granite-3.1-2b-instruct/8a4430107c8609f149e2.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b348680583357cdc479f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b348680583357cdc479f.json new file mode 100644 index 0000000000000000000000000000000000000000..c585a54b7f2df5d16d4d0bf054a463c0e508cf22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b348680583357cdc479f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/phi3/microsoft/Phi-3.5-mini-instruct/3943e7730d04a5ca4a9d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/phi3/microsoft/Phi-3.5-mini-instruct/3943e7730d04a5ca4a9d.json new file mode 100644 index 0000000000000000000000000000000000000000..c921acd648c5f2aea0db65ebb0534a92fc25e40f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/phi3/microsoft/Phi-3.5-mini-instruct/3943e7730d04a5ca4a9d.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen2/Qwen/Qwen2.5-0.5B/287ad99b2a6141ebb12e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen2/Qwen/Qwen2.5-0.5B/287ad99b2a6141ebb12e.json new file mode 100644 index 0000000000000000000000000000000000000000..0cfab2e23ac7c049d4e6787e9d5529cce1e66430 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen2/Qwen/Qwen2.5-0.5B/287ad99b2a6141ebb12e.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-0.6B/c4069e9e5ecd272fe629.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-0.6B/c4069e9e5ecd272fe629.json new file mode 100644 index 0000000000000000000000000000000000000000..0c3d5cfbda9845ed89e3fcc5564c338b3fb06603 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-0.6B/c4069e9e5ecd272fe629.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/0b9d5658f10ecb783ea7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/0b9d5658f10ecb783ea7.json new file mode 100644 index 0000000000000000000000000000000000000000..e9559fa9ac0fec3c91e2f208f3c8626b0da21869 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/0b9d5658f10ecb783ea7.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "feature-extraction", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/c100f806a6006feef39b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/c100f806a6006feef39b.json new file mode 100644 index 0000000000000000000000000000000000000000..c9b72915e2e28c02839a395a223e356f054db373 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/c100f806a6006feef39b.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..407e872a434b1aa61a82adcf8caba390ae19c5a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9987727d12a57032344d324fb110355306256fa7cf9ae2f3cdf4defae516a917 +size 2191456 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0118bd8726e00dccae03f216e03cf125affde76a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e8dfbbde719dcf9a38a+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca11058a95249aa7afb89bdd1eab8b2e60a25197e2c274d262a678323266bc0 +size 10415104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..464a68d5bfaab51945b14d4f5a44c169c0477d7e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab435dc42282d2ebc9f6e2ce2724fa2b653abebbdd34f2c9f5ac5889d106478 +size 1920963 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..543b3ae152a74f148723bdacac0119cf32a5ce1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_164f48a2a338a277ba9b+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a702901a3634de3ed8a3a01b4f35401c31eb58d34225c493f5f5d0bd6a75e0d0 +size 35134464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d7a2de11e11e82268a06cd2cd6dd70dfde43788 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fa745a77335f51d1a4f2b0d7217a87f2102be770422b4fff4108abf40be305 +size 1746746 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e931cf1973e2a3abbd83ce05462c71edc9e7c695 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c716830cfeb2d092208e4deb28828bc6fa6bb1c96cb57ee961f52e3c751b472d +size 2018304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..55b98f5272ffcbcce2cff50edacc09bed51c0aa5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_169cfc2b665743de4e2b+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5947c5beae4f2e2d7a7b5d403feae2a75e755e99b01ae5add1987d5f80dd182 +size 2174610 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..939976050137ca11fc0f43ebcbfecb38d603d209 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb2c4769fa4c70ec3f6f2f6a9b4ee47654614a059cf32131080aef740bee558 +size 1946249 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9fe183a8a4b440263bf2759a87e2eab162dc7a16 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bfcded84a852029984418e4a5f9457c876961cd473586cf0d2f02ec04014b33 +size 3738624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..eb10516c926687fab525794cba28efa270c8c72d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_18ad931622c6ffd3a96c+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7a21ca2e7b095ca207fc77c298f79cdfdae2c584639366ee3224c7cbf2c8ae +size 3906168 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..911f235f25e250f4fa29f600891fa9aabab7e9ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc72101146bb93b226e94d352e88db2d1725fcb2a9c431cf17f35e86718f555 +size 1704954 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a636493cf91a9d17269b2ea2d8908f0667fac270 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1d5235b31814f7b0b086+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eaca1c813cd8b22f99d2aa46c641493affc50f5d42b82090d885e5d3816dfbc +size 17183744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22f1daa90ec08b707402c94b3c3c29b0c9351563 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0440903181d2c8d728444c0da2a86092742d5c3d23a3f898a5bb041c8264138e +size 2644415 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7bbfc8f267db2e84692ff6bc61303f88f662ff0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf5847ebb7eee863c82332518906fd4fdf97f06dea9fd8c5bf0c10d969975db +size 2520064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..affb21f416c9503523afd9a93c9032a1736561f6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_248be7993b4edc8d46c8+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b046ede718afe412f36f20e4e1e458d837d704f8c9c76c69d427e90a5cb202e9 +size 2642335 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c78b3b3da0e74caf73a420567c2219d3eaffe4da --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515923c3030d2428381c1e339c579aa8c1bca8c13c4126d52b584b403ea7f239 +size 2471814 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa859b992305da716c4a28e7b1e567cdb0daf064 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f88c02ac05549bd61ac3eaadac45d919c5a2503d49bf8f9ce2db019a290b98 +size 4465664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f66529bc5211ae9fef09fcffbf53c02f68202440 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_26a5b03803fa3c302f48+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1fe13c618c3c040c37a95b308340e5edee22cd8697b6e9cf5edcd89721a3eb +size 4633208 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..445786e26b52715c86d542d43e297b3cd9ae5d52 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ace86c54e1b508276b79c88d3bdd18e9555be4ae83ae564fdfe3bf9187ffe4 +size 2114263 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..972a22c3cf9c73f1f43eeeffece4c0300b44b966 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4c7cbe55e26f80b4ca5e+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2a1b54911d1eb250b3338e9110052d2627f7ccafa519366787174f97bae94e +size 15719424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8ce68b63879b43c546f9cfe5a82ea6f17b0b062b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7bc76a33d8e31760ee49af87409f48aa5c049440dad16d7426677ad5bd32cdc +size 2343413 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b2bf2f825715496a0e81269f9274c6790c00f427 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee58a69734e58ed13c6b9af4fff8a5879e424f21ef9a30584fae2eecceb4692 +size 2724864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bb6478af127b2ec11b610573bd5a60dfbb484f05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4e741b6c6aff91bf0868+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a49b62daa4c799e015006c2e7df6af1e6b979b8fb016690c5c9eb1a8e0a8994e +size 2881173 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2803bb8fe0a7e75d5f3b483a9b23098fe33602ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af7df2ce11df8830cbb3866b5c793e0d903d22b41d5155b67a40afe38fd935b +size 1716991 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..890a392ebd0d530baa0eb56eb4630204a528e0be --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230fd54d965dfc4e5d8884f993a011c81452089b354a760a0105b845d16c055e +size 1629184 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2feb19c92530de41873acaf11fad136166a719f2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5017b58825859b75d379+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cd7128c441a75586b7b6c1b826e89a91406f4290765352c1d41405a0e38fe9 +size 1751074 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b1cca7c568e0f57dd73f5e31a6bfa7443f57eac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22769c87f72c90fd439683f56b9efaa671e712ea32aca01943e437178fde6153 +size 1690192 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c0da5abcb7397f86676df9084f837ac0065ae258 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccba59eb77a0d95e4a46527bb744d3251b3a041bce891966f7fbfd68b75f6e17 +size 6851584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2d0ec6323eb86d683877e12b0f03338c56c24f6d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ffb30cc6ba169632496+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3184636c826bd1ff2080165a85c4c0899fe275d5d22a9140dc739a02f2c1cc36 +size 6984931 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef452555565e910b22b18d22dcc6f3f00e11a476 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfea97926e54091bde1206263e3bda8511c66550b1554b9eb3384d58f9c8dd59 +size 1615639 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6b9874d54d6e77930ebba92063a7d540ef6803e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_64c05495132d4c3346af+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5719a237ee56d261d2a794ea271f08c62c35e5c5fd19959a5f799f9d6c76a935 +size 49542144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c5324829ac21db80720d602a2184bc067b3652dc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab7c9bebafb09b9d5c0f93feb245c4732e0e55fede342306184924766f30bc5 +size 2055271 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df2c658bcdaaa695ca365abb44b32a2fa47bfb69 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f85fa81b12cece867d7254cbdc4720035dcc4ea3adf3cbaae66c7c3bb813a5 +size 4148224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ee6a2b49baffedeac454cb59f872c4dacfc715db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_688a7baa828603f83864+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc022d078c71dcd7afb99c72befb9da8b3a15f685f654f7fe94331477c26613c +size 4335766 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3388799cf22849f0905f5e1b49734db0adc9b0e6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fe6553148e7d2a0ab0ada62e5d847c6d2bf4f3667faf12979c1be0617e5d4d +size 2458086 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3112d6bb7ab2e38da13ff4c55a86ba710f89e93c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95f820ddd3d798c1c2ed974e858011a9f764a892131618a3b92339406295def +size 2970624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cbb54a2af2bf094cfadb36a971ee8bc31d2b7826 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_68cb8e9f3c2305481f3a+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb2be1eb196bff8c250a6a3cce6e418dd7728f5b499708164826ba78011a94e +size 3107400 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f3e82e2ac80fae462339e8dc1d9ac9f3ddcf02bc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2a6175e9066d90c893cb4de5c58a0cf100b67bdb2e5b4eb8d503b6c6313041 +size 2344505 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b2b84d207f1eca00e441cf0f6b43ece1031f4ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6019716bbd4655c756b17045ddff6c61319863cf3acf8e53758f41f4489079e4 +size 5315584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..de94c379db69528f65d447e3c6bfdc42c425aa74 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_749cb1da17f656c57ecd+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746188331cf6ea5ba767eed8ea3ae95a6163c6583c2102a12299cb453e05d102 +size 5503126 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..339a750f96d2eea4030e1cf94b67fcfa36e7feba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b111c203625ee92ebbaf6ee8df90c7e889d9825c947fe935ecceb243a9048084 +size 2021280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..127546aa70e87058e8ba2d8f13e554a347ce8239 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7a50c1d04b1b5328b107+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeafee154f903b97833b8f63000ba5748f9c358f0c46784bbc06b2378ad45542 +size 44114944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dfb51f62170b5141bc589ecb3ce50e04aae5cf50 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_761fd3fe-ae51-433e-9c32-c0cdf4837bc9/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ce9ea0ff4b9b1e89b70cd710ef1d06794aa12fc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee85e266cdcd1428906abb6ea0e93d890c5ece20051a719cfb1578739dcab83 +size 13463 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..658477f714f7b1cdabf0c80f63eaf4419d2e4cd9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40cac2af2c88f539ec7abda8e2ae943e01272ed8303717e666fca1ec06fd48a +size 400384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cd91c2ed3ebc9fa02dffd84d56b9096e95a8bdf5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9b43351b46acfc35f59c+40f75c1a/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954289a9bb5d050dc68cb764c03ac2ebfcf0880fa3413a8d62e0b5c52aadfe9f +size 404343 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8483b276835238e56db3c5b257403ed94c3e2506 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ff5311ac9a54712fa070eef63231e064237d7a1a13f0bc9fd466fca08b4c91 +size 2004696 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01127a138bb0171796c7269db64e5640b8ff9ad2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_afeee4236e029e2d644e+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03eb5c210e2d50f6ed56ae4d7a0571867f2baa3640903a78e2046ad66dfa67be +size 2223104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c898b209d40e4320bc64d97fe49c3c9ba64fdc6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ba098ae0e6d08336c9be8f2c36a2040c57bf742dac23919e9924876f3a9b8d2 +size 2039561 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c718e913981239bfc6bdc32b56a028bc30a888ed --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60303567db051d773e5a1d158b49a9625f5979642b9dd04694dd79209956d797 +size 2601984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1931eaa743d26985ff79a4aff2604ffb7b13d312 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b0b5c42826d33def683b+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b71f6957155e26782b150a9e005b8359c098169ce81c21aff8e30522bf06bb +size 2677427 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..09faaba6bbdb2231024f19943145dc2c014a0895 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f21028f4fabc9334be20f2f226fb09f1c4bf74069aeafad4559eb66e645795 +size 2027205 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06a9a62f304c2e3afd614ca97194f45e0b82c8b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11bac975330a592e50165d10e987b0d50a020af819704ab417dacbcc3b7a5715 +size 1618944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2ab21ad5b1cbc3311504b3ff7133f5945dfc883a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b555a5f93b2d6c2b964e+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b114d500c8bc7f12b4a38eec2c00dc8d3774bd4983464678193ab8d1fb1588ae +size 1741466 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..298bb4a1a4811b2c10e942f460c439ac1005a8d0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2175887963360733b1565a753b4f7fefd51802d0366712ff0ddd2a58864f51 +size 1650539 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..755be08e2148817c03d287b3826e7e91a80f217a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b61f26bf83def4e7a52c+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7519b2111b9a25ece6521272ada9723bf3e8e11a2571e32b3e0f0e6d3d408646 +size 18340864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..349c8e6fd21f3a37c617af664bf977f4691b4ac4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a95b95cd8c93ecd0bf4b2bfbe9b35d6b83c514b28c88c60e79a20abecc6e5b4 +size 1861448 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d6547bdf34a3693edbb9b829a77a62a0ad51cee --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af72539f02d04655697a304cc32a4664aff9e31e31cb6b713c4458cc210de4ec +size 2356224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6505962cc7845519cfede99837c6d91d4f96db6f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b95ab17ef3954bb04026+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c21dc7a840431b7da19d754ad7c79c6adfd58ec21d83a1e812cdb32d5ae97d +size 2493000 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f85cb693895cc58e0dce8df0bdf7daed985a70fc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a597ba392f32fa74c6dd19878ca60b30e654816519f1f54a05fa02bad0bdd39 +size 2269712 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..110e1b5566fd7a7a051bc71f0fe3a273c4014137 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bb55057d1263c7ecc9f1+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2bbe7cfbaff44a575a1986cd4085a36d70c72b9740e8666c06bb7a7230b6cd +size 34028544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..02f33dea18490cbf5c8ab2134a0d1a888f235fa1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425fa5d1e35dd14bc9c2db25a2f71705f9434e9801e477aa3dee55b654f61164 +size 2027358 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f106aece012d003194cadd14148411087271d68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bddb314edb25866ecdcb+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39025df6a52e66d77e818a7c53be63ae57d4fd32662120370be173f036127ebc +size 55460864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5add1492dc307ac347dec580834d330325b2ce48 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16f178d9de1b01f3d37eccc0f9889281948aca4c28835d81354a841bc372133 +size 1932074 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4077babf05fb8aa50c390b4ea664ab4df2343d4a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_be944b6c4fa695088f62+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055fac3c3fcd3288eece2a5c28f5ad39d846d1992c9bbaec31e923607562ef0e +size 6411264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..28bb2e47e752e41a2c238ed877ca0f7c3879dbd0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a3f589f2fea84c4065bc7f18ed4f88838757549ce6a3538a5573e097265845 +size 1951338 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2d7e2e1e44b0ad2c02fd7ce0097d430f2643d6d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c0eef7e60449b5b8b913+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2937ded97d854ffc8669e9f5e9fca5cddb3317c99ce84dd829aba935c663739 +size 5889024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..204be4c106dcc6a56fdb07ad48985f2e9bb2c3f4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b8113833cfc628fec359a61fcce92bfc0fdc583d4060e96cc283c76db85c01 +size 1744654 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..28a5b2e481953b66289f6a1fc40f4943d9741fc1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c38e6e659433b3e219d8+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185f5589461b6ec0c4e0dd4c176da8282fb4de544b1a9ef27d0e7474abd727b0 +size 6329344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1c111dd280ff4237effb41420236eb9de2c43c60 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9f2a2b49fa2d25838aa3e4579d685d18c784a794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f33bc61f8c0d50d219830cf514b4da394970c3150c5d27284e90f1128913f11 +size 767652 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c670c1136896cab1097dd59a7e78236e8e3a2c7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c96e8a76473cbbe30609+bf3a5a22/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3cba2966f730e86a8bc8d3df186702bb9a72092e3d59ae902530ee7210770d6 +size 162857984 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1c111dd280ff4237effb41420236eb9de2c43c60 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..17916bb34591aa6c3d03e6ae5f631a9a1a9ab2f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3a418fbe799a8b38d7a64d54f9ae7d3bd5ab24584339e75fbba1a9a04e9506 +size 768996 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..314ef89672b470a4bbe46e70ad198144d9736793 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d130c645df160dbdf55c+bf3a5a22/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63bc6f21112c685ce1f72b40eeec97ee9e2c8d02efbbd6b6cffe7e07bdd7589c +size 11756544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a66dfc3cf8a8afd857ec45fa89617e4e6ae731f8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0958c9acce8a6a0e11cbb81831889fb9d8df395f26441cda8556b50b252bc874 +size 1513927 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c271fe4da145829e56669b2655144b9172e06ce8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b1ed6a18d10e9d9911d4fb1ff0fc939fda464ff72288384e149258a9413e72 +size 1936384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..eae2d0362967877c2b062230ba5bd3b0ab9d4723 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_db2f47e16eae1f5df700+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46354e64c48db195e7edf82cb09cdb3fab69f099a81799f13846cb9b68b23f89 +size 2011824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48889be4d2964ce409a78e3608fd675dda838f07 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:913d803179385c7596d13fc4187d4e1a9166140ea6541b416a9a6f48374c67cc +size 2053375 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d6a352d0d81b3435be9afb506e4003e8a840f483 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ed645ef81921b62e3ba7+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b8d50dc0f2b2dcdac0fc778649acc6ad34c3aadbf6ad6642a681c61947a592 +size 41411584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e5ee8962516aedf2c8ba9554c8e636316fede6f6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2546f249fb2184222ccf4d3dd0c9aa9abf19acae8612fee8b8a5a6c0b77251e3 +size 1973263 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..835f4879c3f4a1585773e801b53fe8e61b732e1c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eea79ad4b5b63ff68d1f+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf323e1213b796128d009e13403f0c09144da827bf548b94882490b19c1820c3 +size 13804544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..93f48d8f9626b516d7a9e9eb1c9b3029a99a3ad4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d59035e30bec3df9857d76d4fbd10f1c7e3ffab81ec079cf73d5504a4af88f0 +size 1927879 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91be1235ee225fd01ff29e16565dc287bf56a102 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e4c53abd3fff1a01b73cce45e350abd64afc47e0916a587e5237b839f9caf7 +size 7834624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..272cc1a87c5c069c4a06273e653fc1be7a8b25ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fffa4c481b28ac601395+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e42202c0cb6c9924cd0a0733007a8324476b8abe42361891e63c5eb76f9e718 +size 7982767