diff --git a/.gitattributes b/.gitattributes index 2f70dde6b24da3c6f02c89c2ba8af69487dc4709..645a19f78129dd2db8b47efa1b3a6185c32d230c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7003,3 +7003,26 @@ neuronxcc-2.21.33363.0+82129205/MODULE_b590c13d190cdf017ae9+6170d8e1/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_c9c3ee67891b8431ed41+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_c9c3ee67891b8431ed41+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_565345fb157ec98e94fb+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/d2900659f0c3438aa013.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/d2900659f0c3438aa013.json new file mode 100644 index 0000000000000000000000000000000000000000..4c75a90f39ef12dcbc55064c43db84818125d794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/d2900659f0c3438aa013.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7fde6971cc66d8965351.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7fde6971cc66d8965351.json new file mode 100644 index 0000000000000000000000000000000000000000..fdad245a9f75b0a32510e2050a7bcbe357a35af4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/7fde6971cc66d8965351.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/af2d330d6e4b2cf43703.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/af2d330d6e4b2cf43703.json new file mode 100644 index 0000000000000000000000000000000000000000..cfb064bfd57dbe0094a334e0e4f2390a2dccd1ef --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/af2d330d6e4b2cf43703.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/9a1461765dbc24d49ac7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/9a1461765dbc24d49ac7.json new file mode 100644 index 0000000000000000000000000000000000000000..c9ce2349cc64cf11d749feb64ef458f4677e6e5a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/9a1461765dbc24d49ac7.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8adf13bbd08271a796c8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8adf13bbd08271a796c8.json new file mode 100644 index 0000000000000000000000000000000000000000..a132a2498b087f1c7ccca556a74a7d23482ac95f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/8adf13bbd08271a796c8.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/ee9ffbe1ac4a7ad2ee51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/ee9ffbe1ac4a7ad2ee51.json new file mode 100644 index 0000000000000000000000000000000000000000..05b1c9e977e9ce897fe5a98011d3b19f37bb938b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/ee9ffbe1ac4a7ad2ee51.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/a4bf640c57a2ab4c037e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/a4bf640c57a2ab4c037e.json new file mode 100644 index 0000000000000000000000000000000000000000..4bf6d5f0262aec2537fdd0ed394528e794a49135 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/a4bf640c57a2ab4c037e.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/16b8e47a972c18b057c1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/16b8e47a972c18b057c1.json new file mode 100644 index 0000000000000000000000000000000000000000..3408e49ff28bf73d02316047a752a2665c2f0917 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/16b8e47a972c18b057c1.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b52ec561f1c1fa96cdd0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b52ec561f1c1fa96cdd0.json new file mode 100644 index 0000000000000000000000000000000000000000..3ee69b335e80ddc18519ffb89bdd5b184835f702 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b52ec561f1c1fa96cdd0.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/646e661265ff77f78ecc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/646e661265ff77f78ecc.json new file mode 100644 index 0000000000000000000000000000000000000000..27b0a4d2187e411f24170d67551dc4c7c08dfbc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/646e661265ff77f78ecc.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/7fde6971cc66d8965351.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/7fde6971cc66d8965351.json new file mode 100644 index 0000000000000000000000000000000000000000..fdad245a9f75b0a32510e2050a7bcbe357a35af4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/gemma3_text/unsloth/gemma-3-270m-it/7fde6971cc66d8965351.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/646e661265ff77f78ecc.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/646e661265ff77f78ecc.json new file mode 100644 index 0000000000000000000000000000000000000000..27b0a4d2187e411f24170d67551dc4c7c08dfbc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/646e661265ff77f78ecc.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/llamafactory/tiny-random-Llama-3/9a1461765dbc24d49ac7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/llamafactory/tiny-random-Llama-3/9a1461765dbc24d49ac7.json new file mode 100644 index 0000000000000000000000000000000000000000..c9ce2349cc64cf11d749feb64ef458f4677e6e5a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/llamafactory/tiny-random-Llama-3/9a1461765dbc24d49ac7.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b52ec561f1c1fa96cdd0.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b52ec561f1c1fa96cdd0.json new file mode 100644 index 0000000000000000000000000000000000000000..3ee69b335e80ddc18519ffb89bdd5b184835f702 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama/unsloth/Llama-3.2-1B-Instruct/b52ec561f1c1fa96cdd0.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama4_text/tiny-random/llama-4/16b8e47a972c18b057c1.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama4_text/tiny-random/llama-4/16b8e47a972c18b057c1.json new file mode 100644 index 0000000000000000000000000000000000000000..3408e49ff28bf73d02316047a752a2665c2f0917 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/llama4_text/tiny-random/llama-4/16b8e47a972c18b057c1.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/mixtral/dacorvo/Mixtral-tiny/af2d330d6e4b2cf43703.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/mixtral/dacorvo/Mixtral-tiny/af2d330d6e4b2cf43703.json new file mode 100644 index 0000000000000000000000000000000000000000..cfb064bfd57dbe0094a334e0e4f2390a2dccd1ef --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/mixtral/dacorvo/Mixtral-tiny/af2d330d6e4b2cf43703.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/phi3/yujiepan/phi-4-tiny-random/d2900659f0c3438aa013.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/phi3/yujiepan/phi-4-tiny-random/d2900659f0c3438aa013.json new file mode 100644 index 0000000000000000000000000000000000000000..4c75a90f39ef12dcbc55064c43db84818125d794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/phi3/yujiepan/phi-4-tiny-random/d2900659f0c3438aa013.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/ee9ffbe1ac4a7ad2ee51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/ee9ffbe1ac4a7ad2ee51.json new file mode 100644 index 0000000000000000000000000000000000000000..05b1c9e977e9ce897fe5a98011d3b19f37bb938b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/ee9ffbe1ac4a7ad2ee51.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a4bf640c57a2ab4c037e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a4bf640c57a2ab4c037e.json new file mode 100644 index 0000000000000000000000000000000000000000..4bf6d5f0262aec2537fdd0ed394528e794a49135 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a4bf640c57a2ab4c037e.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/8adf13bbd08271a796c8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/8adf13bbd08271a796c8.json new file mode 100644 index 0000000000000000000000000000000000000000..a132a2498b087f1c7ccca556a74a7d23482ac95f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/8adf13bbd08271a796c8.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c065632eacefd50fff5fd82d84bc7edb4259bb3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0fe8ae6d367e99eb5a27313b6684b26cc8bdc2ffee3d4a8bf5a5251c44beafe +size 95295 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6499cddf3b9f134182674a91a038c79144f5e193 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce71319ab0b4811a8a9085e322f9e9b9529e89b9905a857e3f891fa5fa916041 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..60e2be4d779fceb9fe37b77ca85e0b1e26dac416 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0d92bb2b1fa1b9d3a3d0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a54f900722375339e59690cf409bd8b46face5fa43edf8a67934ad41b283d3b3 +size 288898 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..67496bebbba19b70a29472fb4417e847a688796d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c757db75830e5ca672f8644b5583851422464ea0c30bbb38f612bb55427b554 +size 680451 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c1283b43f5b831c390d41d22ce88cabf2dc9929 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14fd4ec0d1dd57153a23287071f616db83c0660e597a2aa7ee9b9ca9436bfdad +size 543744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9ea70c435e5a73a51c2da8c7bef0ba537eadb398 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_142e7dededa7227b3e62+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee9c92eab70cf570b30720938093022dfd3ceca77479abd878b4078318d07cc +size 563245 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6aa95fd9f9fa7ab52a844f9ac7d705bd79eeef91 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbba0a1af2a53f1fb42fe1ace2a91f1ded9c5df6592cd34424abaaf8b82bde9 +size 84950 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..77dc77e05c4378df5034a62bb4a37f50c8c1ff35 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135c3b437ec9ed62dda890966116857c44f95358cc44e5a7c436a158ff5d20d0 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5720357e1bc488d5233debfb7812a355f42b46cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_15c98fdcb7e9199cea41+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b50ecf8ea86471998cc1c80b24874535b449cdb9408578b65b5c4fc1298089 +size 254967 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7e0c6790082c43cd145aae8cc7e3cc89e15ca8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..406e3141634702eff5a177f4b9d7b387b98c9cfd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1aed48d406d79d5d3e739ae6fa23059cd3a200ff97ed068e452cf2aded1bd09 +size 453530 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..988a9dd42f43f7768949d7abd68f2fa59fb88898 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2dd5e80c893e94b8c53c+677eeb9d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec74b05853ca38742f004b3327e17e41e436de1c7f28d415e914459d99ed1491 +size 3984384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f36e588b625c7ea6972cf0d78c32ff610d659b5b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5909b61d8f3268e4bc41b2411af6e4d216f5f50670988c4e9e320b4b57e74e30 +size 403385 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f7ffdcacb8b33e276ee133c3fce46ccbfd888653 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93bf36e95d20a0e77475315ef8d1cdc93d47cf854e650e302ec18894fdadc619 +size 2018304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d0624f6b12ea09b7f94b12e8d68bf9f353ba6ed4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_36e9f4ac79a30a03d15c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d95ad2132ad479e7fda69e0d1c63a92b9d981d9479c71dc2861c5556691bbe +size 2111217 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7d85cd3d975162683053faa369dcc6259604af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..03cc5a3b0a2c8744229f6ed9ab43c7ac49e26013 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1b86d2eda6afe0bf03aa1f755e61cf0930adbd115277b035a5ba71919cf994 +size 74422 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6a05b7b27ab75ad4e57d15fba92b5ca0efd4c27 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d927a05395fc0145315368108eeb2830e201f19c925609c1d473a54278458440 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..89ec84ecdb068532149cd1a49ac8df9829673fda --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ee7521f420173f2d5db+80d05c3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af695cfdf89a2a5f91a8fe5bec776c2bc952e3f98a15658b400d17d7b129904b +size 289434 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6d675be7cf7972edf83eaff87f47240aa17d3353 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8edecf22b4185a0fd12d029e8a0a761ac76ef60c9d6054b180594a1128a00f +size 82629 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cc8bc358589475a92ec2b8d040e4d7ee5c27bb64 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca1595984e251c12aabf75adbd8edf823c9b992e9e9c96ae7a3f3e2bbc25de2 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c5ace886144eef885f0ffac3e31fa7fb8b22b243 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_47c06cec950237374a0e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c71914ebc3aeef62dfc9a83c5f4182bc4e30e670d11b40afafed4fe96796536 +size 295957 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..debc8b6fbcf604f353f829bb9077840dce51398c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8c25985c486623ca276cb06b549b646a304ddcd155476f5b081a915e1cb92 +size 86868 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f25123c12205f104c87d54a4b7bad879be93742 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1aa6a99deb2e0df2a8ff73269a1d2432f9146b2d16b3e683236ab62c4f1655 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..291b5b7fa1303891f494a9c095b68ac82352d344 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_49ea70b4a47674881bf5+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d518f777845678fba315073ff8235d608609d11a3d0c1611b2d4e90a3580e537 +size 295957 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a40c022faf4beb0fb20246d9c1f9c8f32c6900 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2d86ede422be003223376919aeba1b14b253ecd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a727f0ba6420e2e1182136fcff0af6b605200f6350b2998fecd4c79dc080fa54 +size 92677 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..11d95918b49990a0d1cdf7a88346d8f59b86c96c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e435caa65bc42ea254fe7e485b1c0ba866a28e9c16cfc31b1857fc5cebad0d +size 369664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f9891845ff20f94c61f1b3348e434dcb59f56096 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78ef14a5e66efce40c70+2dde74c7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6a5e7fb172575522d0ee597ae759d5d51ff44b387382c3119d0ff015c31655 +size 379225 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4528f638e70168efb4bfe26185f4b64e661b0df6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec86025f20dfa2069311559fb7e5c088a60b40e9fce163d527c6e5bc6e772f8b +size 899353 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..caf2084c4a6614d0ed2fac6da07b603b92099043 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf23fbaa677ddfb6daa4afddcc2460658e0edb2a5a1e50190e54600a468803b +size 4946944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3009014b4c35daf20ea04be4f3dc350db4bbe685 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bd91d5ab041debe25d4b+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3070b48967b2395bbd40e71639e212fc0f7f09fa4716d68abcab79336a2cb5cd +size 5113468 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7df280b9d0f39a2c19324f55853a4d6d23598946 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242ceabdaa41ac80af33c6d64e92099168480040dbbf4c9ef35846fff0e4e734 +size 572030 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2649d61b69af1ffffdf475f85b3b71041b0caf74 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596d047c9088e8260fe472b2e1da5cffe5d1ab13f0183b85f9ab2ebbe6c6c32f +size 1086464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..46802cc38791261761fb2f40726fc9030339f257 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cf5d937b64d087a8e0ee+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b2b3a002144aa6c062780653815d9e0eae7bdb6fe8f5aa6b64324c4ae97cf6 +size 1208853 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1506bd2e25ca3c180a9d2261b6abebf087eafd48 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7831067a571b1988d3d3f814b6b7baf2e8023f98dc91a6a398eec042943e06a +size 84493 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7ccfb79b797dbfff7c1cad4d432574e32ca946e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e84f7b890583ca31af1b8bc421ba30d1d2f845ac80c2371c8aa28f52eb93998 +size 216064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cfa3519b2b9d5f69f8239173f888ca3aef9bdeba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fe6b8b40a8e1d622e71e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d636b6604e0f586069efbc0ce6723b497855c162a9a68aa017e38f1c54daf98e +size 224275