diff --git a/.gitattributes b/.gitattributes index d71b2329f1406aac0fb1cd085e62dae2ca7b4ebb..1f31564effbf5305e82c34f9c623177b8f49cfdb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15552,3 +15552,15 @@ neuronxcc-2.21.33363.0+82129205/MODULE_f22fe1c9b82b6357a712+4a958caf/wrapped_nef neuronxcc-2.21.33363.0+82129205/MODULE_35f89dd578b938fdd291+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_35f89dd578b938fdd291+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_ab64b202fcda3e011f28+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/440dcb82274f114a66ad.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/440dcb82274f114a66ad.json new file mode 100644 index 0000000000000000000000000000000000000000..bfeef2099ec3ad60f2831beb3084cb506ab19dfd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/440dcb82274f114a66ad.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/9bb0e82ff320f6e2c0bc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/9bb0e82ff320f6e2c0bc.json new file mode 100644 index 0000000000000000000000000000000000000000..3a4de6167df23c2b011ab65228d49ac6b4b90f68 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/9bb0e82ff320f6e2c0bc.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/4925f3a03aeddc47881d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/4925f3a03aeddc47881d.json new file mode 100644 index 0000000000000000000000000000000000000000..e6f068b0fe52d703e91b6f364c1905a19bf10ec7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/4925f3a03aeddc47881d.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/93ac0e7f7bd685a9d58b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/93ac0e7f7bd685a9d58b.json new file mode 100644 index 0000000000000000000000000000000000000000..465ab5f295b0d998b3191d7b84c7536b64e1f0a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/93ac0e7f7bd685a9d58b.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/34454e3efb576d636921.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/34454e3efb576d636921.json new file mode 100644 index 0000000000000000000000000000000000000000..430aa173e531a9e467d36caf1986374eba1f1afc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/34454e3efb576d636921.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5ec0bcb03ab4e2ad844c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5ec0bcb03ab4e2ad844c.json new file mode 100644 index 0000000000000000000000000000000000000000..8d2e6bed31d2964aa423f3f240a5d43c8000ba3f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/5ec0bcb03ab4e2ad844c.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/df54a6f9b56ca3bdfc41.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/df54a6f9b56ca3bdfc41.json new file mode 100644 index 0000000000000000000000000000000000000000..2fde6e4d220e436927df4c4a6a1e206a3e8edf07 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4cb7aff9e2a15c151396f2b684013e39d6739f0dec83e5c9dabbfe9d5fcf77b7/df54a6f9b56ca3bdfc41.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/2c2bad80f80ab55a2d84.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/2c2bad80f80ab55a2d84.json new file mode 100644 index 0000000000000000000000000000000000000000..49dd4673430e4954c3083b91bc4d6e20b27d8952 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/2c2bad80f80ab55a2d84.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/9c8cb869b5d7e2540fa0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/9c8cb869b5d7e2540fa0.json new file mode 100644 index 0000000000000000000000000000000000000000..2b83960388de0f1c0e93222fd6a52e9a36d3db15 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/4d281af1651143f9a83b42af25d9f0a533dbbd9c3441537127ae772fd3e399cc/9c8cb869b5d7e2540fa0.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/12ce6ef4377bb2672028.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/12ce6ef4377bb2672028.json new file mode 100644 index 0000000000000000000000000000000000000000..c7fadaa4b1e8ff46026b5faa7984906b8218c1b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/12ce6ef4377bb2672028.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5997ab86579aadcc5cdd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5997ab86579aadcc5cdd.json new file mode 100644 index 0000000000000000000000000000000000000000..5ae9543e704017ac91b51850237966271e6fddf3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5997ab86579aadcc5cdd.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f85a0d1bea066bc1274b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f85a0d1bea066bc1274b.json new file mode 100644 index 0000000000000000000000000000000000000000..66d3f7b41dc160fbb6d53f96f28d8a6cb97267d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f85a0d1bea066bc1274b.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/1a86e1cfcaf229143d5c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/1a86e1cfcaf229143d5c.json new file mode 100644 index 0000000000000000000000000000000000000000..34cd2fa0bb4cbcee0773a8ad0dac1602cfc655ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/1a86e1cfcaf229143d5c.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7144c43faca4fea18b01.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7144c43faca4fea18b01.json new file mode 100644 index 0000000000000000000000000000000000000000..ab7da55f773489bc710b80303fe842535894d180 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/7144c43faca4fea18b01.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/eb6ead215819eab7b04a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/eb6ead215819eab7b04a.json new file mode 100644 index 0000000000000000000000000000000000000000..1b76b3b609d91c6b9b15177b879c038fb9fd506d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/eb6ead215819eab7b04a.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/a037d3d5582033b493b7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/a037d3d5582033b493b7.json new file mode 100644 index 0000000000000000000000000000000000000000..cf30f4ead11eeb095d64647a4dfb4be775176122 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/a037d3d5582033b493b7.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/8cefa5ff9083418aa040.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/8cefa5ff9083418aa040.json new file mode 100644 index 0000000000000000000000000000000000000000..1c26f078f1a864d944db73933888f7035150c591 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/8cefa5ff9083418aa040.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/9fbd66c92baabc8b5ad0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/9fbd66c92baabc8b5ad0.json new file mode 100644 index 0000000000000000000000000000000000000000..24bc9089e10170b33e480d0c95770c1b7fa5993b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/9fbd66c92baabc8b5ad0.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ada37737b07aadde24a9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ada37737b07aadde24a9.json new file mode 100644 index 0000000000000000000000000000000000000000..acf5254d099cc5120918c8ffcd54a4624276f2be --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/ada37737b07aadde24a9.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/97503f4e2dca15bd6721.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/97503f4e2dca15bd6721.json new file mode 100644 index 0000000000000000000000000000000000000000..d5ba311d1346f4445e53c02831e6dd90c303dc87 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/97503f4e2dca15bd6721.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/a7d7ccbe80fcc54f090d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/a7d7ccbe80fcc54f090d.json new file mode 100644 index 0000000000000000000000000000000000000000..d59e99d697abe8dc928d51c3f30fbad9823749b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/a7d7ccbe80fcc54f090d.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0aa29066680da68d477e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0aa29066680da68d477e.json new file mode 100644 index 0000000000000000000000000000000000000000..711534a6b369b095cfc0812a14893bd80c110fbf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/0aa29066680da68d477e.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4aab355cf64b87f1cff4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4aab355cf64b87f1cff4.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d5449825097535cc6448bf878eb6fd9f82214 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/4aab355cf64b87f1cff4.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/72eee6f943925a38948a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/72eee6f943925a38948a.json new file mode 100644 index 0000000000000000000000000000000000000000..82bbcbc3e67c461b02bd998ddc5dcbd8536f7f8a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/72eee6f943925a38948a.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/c4a5702b42ed466de9e7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/c4a5702b42ed466de9e7.json new file mode 100644 index 0000000000000000000000000000000000000000..f771141b3c91f612a6a8440878872810aca2006a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/c4a5702b42ed466de9e7.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/f18d2067d0df4f48c0c4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/f18d2067d0df4f48c0c4.json new file mode 100644 index 0000000000000000000000000000000000000000..b09933cb934911dd7e5eaebb310543a8ac5ebe18 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/f18d2067d0df4f48c0c4.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/2eeaf9c9a7977232515a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/2eeaf9c9a7977232515a.json new file mode 100644 index 0000000000000000000000000000000000000000..044257c233e4796b4908405e9f09924d54cad67b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/2eeaf9c9a7977232515a.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2eeaf9c9a7977232515a.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2eeaf9c9a7977232515a.json new file mode 100644 index 0000000000000000000000000000000000000000..044257c233e4796b4908405e9f09924d54cad67b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2eeaf9c9a7977232515a.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/ibm-granite/granite-3.1-2b-instruct/440dcb82274f114a66ad.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/ibm-granite/granite-3.1-2b-instruct/440dcb82274f114a66ad.json new file mode 100644 index 0000000000000000000000000000000000000000..bfeef2099ec3ad60f2831beb3084cb506ab19dfd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/granite/ibm-granite/granite-3.1-2b-instruct/440dcb82274f114a66ad.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/llamafactory/tiny-random-Llama-3/5997ab86579aadcc5cdd.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/llamafactory/tiny-random-Llama-3/5997ab86579aadcc5cdd.json new file mode 100644 index 0000000000000000000000000000000000000000..5ae9543e704017ac91b51850237966271e6fddf3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/llamafactory/tiny-random-Llama-3/5997ab86579aadcc5cdd.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/unsloth/Llama-3.2-1B-Instruct/4aab355cf64b87f1cff4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/unsloth/Llama-3.2-1B-Instruct/4aab355cf64b87f1cff4.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d5449825097535cc6448bf878eb6fd9f82214 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama/unsloth/Llama-3.2-1B-Instruct/4aab355cf64b87f1cff4.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama4_text/tiny-random/llama-4/a7d7ccbe80fcc54f090d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama4_text/tiny-random/llama-4/a7d7ccbe80fcc54f090d.json new file mode 100644 index 0000000000000000000000000000000000000000..d59e99d697abe8dc928d51c3f30fbad9823749b7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/llama4_text/tiny-random/llama-4/a7d7ccbe80fcc54f090d.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/mixtral/dacorvo/Mixtral-tiny/93ac0e7f7bd685a9d58b.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/mixtral/dacorvo/Mixtral-tiny/93ac0e7f7bd685a9d58b.json new file mode 100644 index 0000000000000000000000000000000000000000..465ab5f295b0d998b3191d7b84c7536b64e1f0a9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/mixtral/dacorvo/Mixtral-tiny/93ac0e7f7bd685a9d58b.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/microsoft/Phi-3.5-mini-instruct/8cefa5ff9083418aa040.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/microsoft/Phi-3.5-mini-instruct/8cefa5ff9083418aa040.json new file mode 100644 index 0000000000000000000000000000000000000000..1c26f078f1a864d944db73933888f7035150c591 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/microsoft/Phi-3.5-mini-instruct/8cefa5ff9083418aa040.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/yujiepan/phi-4-tiny-random/4925f3a03aeddc47881d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/yujiepan/phi-4-tiny-random/4925f3a03aeddc47881d.json new file mode 100644 index 0000000000000000000000000000000000000000..e6f068b0fe52d703e91b6f364c1905a19bf10ec7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/phi3/yujiepan/phi-4-tiny-random/4925f3a03aeddc47881d.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/Qwen/Qwen2.5-0.5B/5ec0bcb03ab4e2ad844c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/Qwen/Qwen2.5-0.5B/5ec0bcb03ab4e2ad844c.json new file mode 100644 index 0000000000000000000000000000000000000000..8d2e6bed31d2964aa423f3f240a5d43c8000ba3f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/Qwen/Qwen2.5-0.5B/5ec0bcb03ab4e2ad844c.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/a037d3d5582033b493b7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/a037d3d5582033b493b7.json new file mode 100644 index 0000000000000000000000000000000000000000..cf30f4ead11eeb095d64647a4dfb4be775176122 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/a037d3d5582033b493b7.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/2c2bad80f80ab55a2d84.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/2c2bad80f80ab55a2d84.json new file mode 100644 index 0000000000000000000000000000000000000000..49dd4673430e4954c3083b91bc4d6e20b27d8952 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/2c2bad80f80ab55a2d84.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ada37737b07aadde24a9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ada37737b07aadde24a9.json new file mode 100644 index 0000000000000000000000000000000000000000..acf5254d099cc5120918c8ffcd54a4624276f2be --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/ada37737b07aadde24a9.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/7144c43faca4fea18b01.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/7144c43faca4fea18b01.json new file mode 100644 index 0000000000000000000000000000000000000000..ab7da55f773489bc710b80303fe842535894d180 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/smollm3/HuggingFaceTB/SmolLM3-3B/7144c43faca4fea18b01.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..49995cdb95ab76a23219bcb74919ee7d900cffa5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f86db28f-8362-4ab3-bca1-87fb7357b2a1/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4bb45842ea0894b69b62a879499055cb6a5cb9ae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2fc82d01eae877238b23e1fa62d9bd75d648e5ffdf58b8cf01e4f672c3e27e +size 11280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6832dc4c2c3de1baa1c067ebf3fa0985b6bc6830 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03bae6db1a714232a6d34ce6ceeec0ed6626740f78e3af4b5003cc96596430c5 +size 1444864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..65c4760ed36e479d3904c35e005cc961a942054a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_04d46d63afe5b4e1584b+6bc3ae29/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe8ea4bd2198e83af2c041364b064a638c2aea8ab17aa34c467e4bb7c5b4724 +size 1447845 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff index 9508e8a34fc62e57099455cdc226e6272b19164d..e9789a59e4c0ee71914966baa7ac0b679d670cc2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff index d6da0e362e48ffea00abdb4d30e54e7e51283c4e..86ff6ed91b2d3c2dcdbac2d6ec7dee55c3952a66 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff index c854476aa5fa986694218636f16856ef933a95d3..d0e3464928b50d8b16e44a133151619d505fd8b5 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff index 800d2c5caee880b7859f5b4dc41b103a46839b08..296a9fa95e288fa6ccc400521f4a723455aa31fd 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff index 441b51df057c6331766a28b6a8961e410ab17eec..eaeed6492e7867e1839fb9b453d39e532d18ff58 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff index 58e7103b304796053410e5ef71ef7fd75efce55f..34efee8511893591557ec9a690d1acea7d2f993a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff index cf59fd0993cd275e02b4f0c0a79b5a1b2b3c6b0c..4a4cb8098ab0f2255f7ea345601bae5e32420c4f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff index ef91b3d153abdcc02dba247ab388fc7b0d53c5f7..7d3e6749c70825392c18704013b153ea78071630 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff index fab1ca7a812d28e79621353e90dd0f3ed6d660e0..8601a6f2f2e98e259897e68a5cc3eac84711ce8d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff index 33c465e1728d4a379ca504efda6be3c01c3e1b21..334b438de5de1955b3d1872975a8eecf2116b32e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff index 3f11b258b6b7bde4498dea1c942d3fb4363f416b..c4d3f7963be47e62d8781dd7a849e236f070315a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_12487216553200321032+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff index 83ab4f79e57a37172e554723c1161d2e743fb85e..4c937d2052138a5983a2cda55a9e86216337b207 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13085549342645515693+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff index 301e50a9843c97d8763dda8e92774c6dbeb993dd..595161a6c0e421153a6ad315f5e74cb20c5ceae1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff index 213a474d7ee696e9c189722df6bd1b389eb1c697..37e1e68ec25001fbba24a7b54817ad13ceaef52d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff index 4453e9b7f7663f30f88a0f9451c5f64c91f96cd6..b762f96395dc5133186777d5bcf1f649b539314a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff index b02d6d450fd1bacdc3bbff8dcd51fc76213ce572..be71abc5b2af4d51a2cca812e7d77f7465ef54df 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff index d33a324ce9d772839e0be49dd4cb17c200c6f08b..851701bc9a5acf80063661e38d79406894b92389 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff index 87f700282d391f27dfbe9d03a587e817df7ece84..8966014d23a854aaeef5b4f33a25e24176d056eb 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff index 30dd50007a8c37fbae6dbd88dd641566ceb72ed4..ca215f441f90234c679c1fed91779fe51d65cbed 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff index 78b006126c1bf34dc75df3bbfd297d3917f0a0dd..2bc6c069b0d09bcf4bbb6c8c9f9eeeb65ecf744a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff index 8e06e6790e7d7ce4acdf491069d0d8df5189a45e..77be1411663bcfdd549e7c037911d70d1a56224e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff index f40d603768d268d7fcf76682891c6a121a2f46fb..e8c370aaa227ae3a2dea2620ccf9bbfecc5861f2 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff index 853b1571074af261798a00a798d6db4180d1b7c8..4f9615b812fb406acc281841cceca42a7811147e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15442663025941492357+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff index 5450eb6eb141c9b8c39d377c17c4395aa8c496f8..44b2b245c9f0dfc5d1f810da77eb1921fa1fc5ca 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff index 1376f82f7bc866c1f499d2ec457a2a8595c7d909..0bca96ee2987fee44775372e44e083312e049806 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15564425602929126510+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff index 49c618e53565b88ebca548e19b18e1638105ff68..7bbe162a67d08be327d898d595175c71f35fe346 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15589838460896944293+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff index 1f62adc6fc87e643fbdc853246e2d490ee4945ef..60501b6604111e4ac4c731d9c16d417c3aeeb11d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15654572849171857535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff index ae0d3100f79c540eed0c5315d0cacac92241ab00..5ca02254c407c414893b34c964be44d4be730749 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15767383571209512795+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff index 55964410083f3f5344a9bd5444d413e54a2583e6..c6df0dc8839b88ecb47377c320e8241d495b51e6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15888836045088309511+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff index 7eda7cd45088b22a1768686bcae452da27501f85..241f0fdac161371d8e58d4c8886f85e3a0654fec 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15929451261464042997+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff index f1360538ecacdbbcb33dbdd9e75ccf13f983efc7..30dc3e68dc05e0d39ce1ebeb182e7379f3f81f29 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16016505958416521648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff index fe43a8916f4a77176aa93b64a5a03d05f6861375..9557933248a98273bc2b3b0959945005044bf6ec 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16053163479112702088+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff index 12ba6cb5d5ade940852df0809979207999788cb7..700b4affc2a08f5acf9ab3fd4f8b30bbf89605e3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16204387852795926216+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff index 49214d41a9a59fad1f1870427d43f2ff3b750a4e..6932b3d1dad2966d4553f46565ec006dd2293333 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1656719109221189948+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff index aa16378ed809a6e21daa6dc69ff7ae1a71c79152..c707d9186cb6198ed6e0d1d7c440c70d189741ba 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16599571375348449904+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff index 090af26ab03a932e72be1f1485591057b8df2fc1..cbaa4dfb23771c1b311345b54ea2d1650eecbd05 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1663757798483801648+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff index f5632f882683c14418ab225740660d4fa73ab083..25f529dd50c8f7fbce3997ff5d0adf4839290209 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_16738296820980389103+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff index 77dea5385338ed32298998c67a08e3e143a1c1e7..a9b19df0b791ae3e2b5b08db248f7d8b8dd70e8c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1696825468766062114+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff index efca1d4ef78543dc4f5b526da4612dd6899e0e98..3c2697826e5866874d51c96a23b00bce4155e8c3 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff index 28046d910e124ef596ebc9b788f21226d730b3fe..4678a1ea66fa08f8fb02d59ae48037e72c6c59bf 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17781596253725927902+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff index 18ae5874f88807c40a1ae1415a3df09ccdb6040f..724bf5e29690aa7b5ecad98475fd247d83acf76f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1795740353831178306+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff index f8d8937035d1742377f047782a313fc887d1516c..629cc0c639973a0f11a785c9d9262dfba4ed9dd1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1800832390737682969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff index 7cb65a697739b9ca150d720e08996dc16530eae8..226a43d72d0bac15510af7273654f2f7da7d11d1 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805481651134498710+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff index e3ce0cbf341deaa7cb2bede66e170244b89fa002..1476bd80686423dd4b14d1975b7df3e9b270f399 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1805728312057401221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff index a8fc9bed903623898ee5445bcb7162cbd97fef34..43dde8453458a49287bf62b672e16845f33284f6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18070749384531238695+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff index 8b0beb6fb6ec1e4f46c06dcbfa3101e252040299..79d4ff5201c708fa611d76179355c85b26b15b48 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18134517905922687964+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff index 1f31c71f95a7846facb80d1913a5bc33f711b79d..9408d118620047e9136b7cefdea040fc0536fc93 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_18207475553554060390+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff index 6ada109e41fa7214885e6a539579416cad2fa94d..94b6e32380d8c589a640ef962825b23cfe9b4d3b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2157613318347839507+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff index 120ebfd47574b7ca2701d34ca3e073b900d85013..74c5ee991ed85c376867ce3d738669ceb3603d87 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2203200257461345827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff index a241c77592f99fb2048d353a24ac6d729b6fc512..06e6a1927210aa2d1ac2234e7f8890a5d614fa61 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2410426589418994260+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff index bed75d83f1eed3b67cd069fc93a456311a9dcc99..7b6593279db3257fdbff0454625d2edb67e2b808 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2426414314187505427+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff index 7a2908bb064ffea991007c402b0ade9fc95dc9f2..643c30abad687e46db238a645a8f6d48ea691792 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2576759111807165188+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff index ca7036c0a361ca30f22cbfc2c2f4243db6d6eb81..8fd8f2a159baa32f270b50272f9df63a22250532 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2650447136112456251+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff index f929bfe05465ba3b72f1ac495e23c3314ffb7c89..8a5713a86636963fcef65046a59f35c1ec83e620 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2682953264353234433+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff index 7a0f593f9ca78555fe54747773ed2b6b8599bfa7..296b2f96349d616c85e5086bd48e5a19e668012a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2699280712748688265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff index eff2fa6ecad67cba262cffa4cc5532e7faf15afb..0bf826d921cf8376f7464dd3274254513151eb67 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_2811495285170804454+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff index 9a2d5225a21e5a7ef586c38529cc7ef431ccac93..9cb0ccd09d96653d2da66e716f7da84aa984d37e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2ce3d3065088729c7d7f+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f6e66e11feab45ba8eea367642f504ac0e73ad6de1686a12391e8d229790ef2 +oid sha256:e03d3e49fe507adaa3dc96438f67c7d1c36536121056d101abc75998b0e42c6e size 3769344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff index d33262157baa7a46d8b54efeccbb57b5d0bd7795..e2a3c17c7c41ee14c08189a37ddf0cb20eda970c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_302950992539679003+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8903bfb2fb424739cad781a9bd3df5d1179f92e8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_92a9c4c3-274d-4b5a-9666-ac67a38bc8d5/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b7f9f80de01bab3afae30f106439a7600609730b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4f4cdcd60e648ee1c6f483fd7d775daeec9d14a980ac23a3446e8df4ef38a3 +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a9bf2e00422ff05e37bfd7b1fb324601b57f4da4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfb682c78411c63c1c897f5a74768861a7fd1400761ffeb79029b3a0f68cf6f +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b5f67b67ead9e67a8dff9477701e1dd763eee1e1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_311c80d1ee4083869ef9+35105abb/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a654bcc6382cc2c562c25bbaddfdad0b78ba07fb487d0c818fba265ad6146294 +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff index 2ecc1ac7cd6cefc8ce5d93f2c9c508cad7ba93c8..b24eb479c1705929b6728ed9d91ef99fd5b29e80 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4013984518400530928+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff index 709a28b9dad3c3a2187d798ff86d9c806fbf1c8b..5dbe6d69fa0b258f77f09a6a3266ffe901a529cb 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4052264821257342969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff index d915a4988e3b4527cd376bf8bb021a8ec0c2daa2..1e47836fbe8deb13d9d147b899897e057f74e72a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4166695227062860792+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff index 751fa9991bca215e784ee48cfc0372759aa093b1..60736ac51185d8e15cda5cbed7085cf52c4fa7e8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4232557591140652245+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff index e175bf0bbb9650a53243ed90f27273adff3c8a15..de5b2c94465bcdbee55dee933086a0a78e995ca0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4606491920565381523+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff index 293e45c5f9203889a538eaa0415370dde771795a..cd3a926f17fd9d418691b33e21d091e421ac451b 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4608333438381222049+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff index db277c990cef3a3df37e5ad67e54735b49778c19..c1148588ed08372740c4fedd8d339baa21417e16 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4683016291147007110+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff index fb856cad6f84b7874131a06251d19dad00a0ec89..1de00cf4fb08e43edfb8315bb0f05ef7fa24b6c9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4816422433712790690+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff index d7cbe0975b9b8c7ff0166a606c2fa0ac51438afa..23bc6fc2c55a03f9f38aa65deda4f26a1baa355e 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4842952601010372434+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff index 176dde029846d9b3de418b9644da353733db0315..7a0f61c20ef7eca777a97e54996841abef6b7d7c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_4939721357859779936+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff index 7f13550f3bacede3e9bf50e7064ba47c2bdf79b5..dbae5090a11a5e070b212f754c1ad5eac3875dd0 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5397223456911199516+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff index 01cb9366f28395659d8bb73d9e303fc8c0bc3bb6..794c42a2e44edf52789df265a62982aaff40cf4c 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5724544066416780383+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff index d4034c6b3c7fdfb87523947823bd503ee952dc66..721a25060e415b0e6173d3f40bfb1be4743acf63 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_5765916484866650909+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..53280b9ad50a9bd6b913cf2594d811f8a2cbbb65 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_60ff5e33-4638-4f51-91b7-3035256b0013/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..618358c2b96cde9d0fdd8a5363f4e3756584720d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a756345a0e47699f8c540a039458d52f4072f93c3d432124a626815c5383b0 +size 14480 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..778d65e779218be3db1ae892ef722b28d99c1794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7091e7cc6a44d95f1ca785b3030f55c958ae0093fcacb39eb0f407e19e438301 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bbc1cba11f449e67d502f6392b30b598a503d4f6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_57eeb604bf270dafbec4+9e6e935c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef51c221168e540a5c904f42e06b0aa2c486a7011157ff132d5d0ea9b9f94ef4 +size 272962 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff index 3aa1ba8be9c194978379690925294a78ffb81015..d66370037712a991b2a46367a1a7654728a896ac 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6368409506294683105+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff index 938ba38804a1d5b44314e7a5de0e64dff8cbb518..e39004bf1d17757e3ea7934fb51295246d25835a 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6375780938374445148+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff index 7dfbb619fda32ce4db304fada6d538c45e2e4695..18b27f08fe0d4bfe457288ee66f003c7c19f5ef6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6417297157123190451+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff index 5dd3fbf696011303eb21ffb442effb79fac7916e..cf8cbe139b765bfe83ba5a027da462d317606f77 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_665145310648895012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff index 52c076e207dcff870fb53b56fdbbe1f1679028f5..a11174d75eeaab63dcfc0aa3005a1bc29d2c43a8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_6711765444274762085+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff index d5ffaa46b110613ae2c8ab69030f795042db9c55..e656f4e4265213997c872f7ca32c14e3b8cd0d0f 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7049343467335223052+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff index 933ffa85c5c5288cf79b98acf26df0d1076ff832..f0dac0ff743c5b9ac1225cafe3594157ddb837c6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_7877866862268499659+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff index 784a22884fad487f91192c8672eeaabe9aece179..9e214e70a4b8ede66721813f460ccbc1da6a8acf 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_793722132614349680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e0a9df99176ddaf21fc6da0204d274dc49da1d1b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_afe49a9c-3be7-4600-9ae6-f97b15b8db9a/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95446bb55b323cd9f32a0f56362c2561570bf436 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df9a2c1e36b41b938d2c18bfc65c09e2b28db32fca1985277be65be7a2a3e72 +size 103424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2d74234cba23fb5bffe6b061ffe25852595b3733 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_812cbf042a260062f4b9+d1b57957/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995894face8b93f6b06427a913ecbfbe25cda0583a6e53c1b7b647af6f393c76 +size 104320 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff index c5583a2f3b0b1f63787de39a19feabc0763056cb..72c4eaadfec117e1cdbb4b36f8cf178bc49901ae 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8142158350842512240+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff index d924a092e87295963fea866063c0c83bbc6b386b..98ba6c8fdd74cbacbce84d7662c086d48d1b4bd9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8146958338478347620+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff index 9908d9e86dd519923dd2841bdd0ece7792f1a348..48f571b56e6b5104aff852e20952e8d1bc32d5c6 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8245165830758578911+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff index 372e0075c77341498f54ac6f95361ec3e05292ce..d9ab8ea191ba1dc450282acbfbc5760aee939c39 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8679096817551502409+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff index 9345bcad8f884d2e62805b047e99f49d2c65326e..717e953363b0538282e9d0a5e04cf17e198b0547 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_8882592820567938515+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff index 6e633a1ef0a4fbee897781702da29cab976dc890..0ce1c749d27ebd45851c37c053703673a1de59a0 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e38b003badbf415a083a787b50e1b6efb551bf74c6bd0799bb5490041c3d2a50 +oid sha256:8fc97cb51087dc7febf855fce0053431c5fc13dbea44d8bc3f8079481a790ae1 size 42322944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff index 5bd4eddfe3a8553a27827ec4b71312c37be25f6f..aade92913bce7023bf6943f2264ba30a8d27aca8 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9168843780052024308+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff index 4cc7a30febad3b1c7a2cc0b6b18a18371d79f3d4..43cc8b2a2f0387f5487678f7deda9aaf7ded4cf9 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9527558113976496538+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff index a0b22242aa49fd79ff78f502671c003998ec35bf..6ce279f4b2a8752a5e1deecf9fc0754f82660a58 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9533302140457976101+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff index b165b8a89257ab7770bb82933b45cdb54905da81..467f4809ecfbcffe5ed0c489c9a47dd5e19c168d 100644 Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_9770544877851564228+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..18b07bc59f99c604f8e43e77fb2dd2d407b0f0ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_60fb3012-6a8f-413b-a855-c6d90c5c59e2/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86334547cf529144f30438938fdd617420306649 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_b49a402e0adda22faf30+f0a61cfe/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1846006439f18f36f7f877a820bc62878c2ea2ac --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f7d9d114-00f8-4fda-99e2-e3fe8b54a493/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e51cb39a846e4f58cfe3d5ec50ea34e7babca50 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c233e7013daa344cb368079ec790fbf10f597bc7f8708f8c91ff6684cdc1cd2e +size 29412 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8af1c1edee81cd10a47a3195193f689c024ad917 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94c23b4b50de785af0733e70ebca67d543f1264dc5608209b36fbdb94b9aeeb +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a497a4529282682a8ef9d5f69aad2fa8b8f81ce7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_beeee808ee2c5817dadb+41bea4e3/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9ba7f412c9269ac8ea671f2b4f505df362d5dd9bb53af654c0d1ded19bae78 +size 334452 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ec0cc833c85f767d699754cca4eb05793d75f325 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_95746504-ea96-4d87-8101-acb8b2a928ac/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6da2cba66863fc471486cc2a5da22b16c00d6480 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993f99d2d09d34bd152af4c7f96a1e6e1d8788e1cd4aefb845c601d2f4d5fcfb +size 1931 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f53c3ab0dbbb75a76bb1c8b4f2fc537e413d2862 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661fef42d1067e344011df403559fd85e66cce503d4cadc843a28bffe08fc3a0 +size 134144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..87379244eeaaa7128d27e028b48ab92e999c6234 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c01f2cdb2ffaf3dd482e+0c4991c2/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9692eaa0161395c68719cd02c7a9eb5b68a6c091c0a8a3f924da43a7f6b21ac +size 136222 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff index 022a97092443e5d2efffd1062e22fe7f976e6ef1..499b183678a9ae2436f97835fd7f9cb60e0bd557 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45a06f35e03402edecb9a771aed00c4a2d36c9f6c730c7d9ec9513e6e8e80781 +oid sha256:9af0746ca99078f3af58c368a4b1873b631321c0f21fa64be6b2600245267d59 size 1547264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo index df2611a0c98189494b2be2e9c1da353dc56d160e..606d4c6d3bb148fa49d3fbdb4bfe623d4b8bbf83 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f19ecf2e2bcc3a4b1bd9+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:322e5f93cacc917d7fac645fa5e1dfbf8b36979092177c578f94a5d190cd9f56 +oid sha256:d654248dbe811f597847c9e197cc3763e3a76582823311abafc95bf4217697ab size 1703455 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff index 62acb0cdd5ec6622f1af59121152a19c7df93958..6bd46b1c7f6772734d1f401191b7304c93a67f60 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa474ad6339caa043cca7c92183cd0244c38b0db43ac21ee7fbb953ddab4a989 +oid sha256:2eee692c9668454a7cf6bc3b2208db1a9a9a246dd4fd8dac40cb25385a7d5851 size 2100224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo index 9eb6eb99ea28ca0dd284fae403aa2e150ce92ef5..c953dcbf75124b5779d24a59634f6656288e1ab1 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fde2b1111ad10bd7504e+a02c3a36/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99ef0512f48cc6fd21c69a7a42107debebf16a38c3627f9190e0e652ec2c4257 +oid sha256:48ef30bb5730ed4954a301fe583b8ab8255ed902886cc0e84a5b93c5b5b59fc2 size 2174311