diff --git a/.gitattributes b/.gitattributes index 67ccf8f670fcbe016bdd5cea850867fc779ec0a2..40c5197c656967ec1c987fd82702a0e2a7746a04 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2933,3 +2933,12 @@ neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json new file mode 100644 index 0000000000000000000000000000000000000000..d275807cdbfb870a0be53266909b4296b27a64a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json new file mode 100644 index 0000000000000000000000000000000000000000..00eb3567442b2cfd634e41a17799aaecf7ea764c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json new file mode 100644 index 0000000000000000000000000000000000000000..e570ab13e6bb7e702a8883bdb57fbd286231eeb9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json new file mode 100644 index 0000000000000000000000000000000000000000..e2199f4bcd5afceef8b436b0604458d7ffa940d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json new file mode 100644 index 0000000000000000000000000000000000000000..d06d7489fd2f02e68ccc3db3bc42dc9aab412dab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json new file mode 100644 index 0000000000000000000000000000000000000000..01bdfd8810bccfc48298fe2d8748a300be7808b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json new file mode 100644 index 0000000000000000000000000000000000000000..a264242c960e5343432f086a620fa49f1847e6ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json new file mode 100644 index 0000000000000000000000000000000000000000..1a16723daefaa5f854b0ffaf693f91336bf4590e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json new file mode 100644 index 0000000000000000000000000000000000000000..985951ec31d99688d6a9f2f3088913292858a408 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json new file mode 100644 index 0000000000000000000000000000000000000000..b37c012b8de4311564d312e0db455b81aa1ce41c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json new file mode 100644 index 0000000000000000000000000000000000000000..4cef32eafadc72598528fee29ffc8bd1d3ed53e4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json new file mode 100644 index 0000000000000000000000000000000000000000..bc22200f7902327a730a4cdba88cd1463733255a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json new file mode 100644 index 0000000000000000000000000000000000000000..5d8ded2e0cadb34394415461281d23f65bad2f91 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json new file mode 100644 index 0000000000000000000000000000000000000000..51ecee0d74bf7f4205108309ea56c4e5a4ac9d3d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json new file mode 100644 index 0000000000000000000000000000000000000000..0b39e09fdd981bd22b3a2220391784a1cc253c19 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json new file mode 100644 index 0000000000000000000000000000000000000000..67faa73f3256763a9e6d2147fd2c2b98966f7c42 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json new file mode 100644 index 0000000000000000000000000000000000000000..625d08dca70ac1b81a75b7fef2f459399a6f6532 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json new file mode 100644 index 0000000000000000000000000000000000000000..45276a6595cd3b61f50b7c8e45f6b662140c0c4d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json new file mode 100644 index 0000000000000000000000000000000000000000..f8e969e38469674a00f91fb87a1df44c82526340 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2cb9abfb8e1ee1c3a50d91035b5819017ac4f072 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cce13c2c8d6e621050c4f6d2a392fb09e41dd7942671696a8fe6a6738dfe181 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8a131482b172f4d5bcdd500e7c733b18c4dab230 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1fb56e7082598b8366ecc1e26e3508fca8b84f7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac7b363ad14391ea6e90f95eff1d88a5146cc28af5699413964e7fc4febe0b2 +size 55929 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26c75f37364b760d300715130cda08963aed7d99 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e85e7c33a12ba1aded0285eb00c9631d44713a2e29c7e17268dcf5109dce34 +size 2366464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4e89d7da3b61b35b25092ea0c2d9f009c71afa8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4acd67a4467ef3a0cb39e5f4ff6d7234573efbd930e665c2631b471c1a2ab93 +size 2376747 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cee68abee60c0300cdc0f7363612aae53fb136c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3de47ddfe9c2dd2dd851b262ed8e72f81d7bd5088bd4c866e33eccb1235c731 +size 88686 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d714cc233ea87d3373db30b3e4342fb76028267e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670eb5f6e43393459448f206825777f42128ddb80a24d3859cee614b17f37054 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2d298b31a325d9db281b468fc86d636891ed5277 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79163920afe05ec7bb4285c4a6cc84559fbbf3b4fde58c15eccb5c8001b019c +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ca90971c7cb3ce9400fca9bcac1cb70f8bc0979 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef04c56dfa88ba3744238690dd27414699ab135700c99f5f366574da1704460 +size 88686 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07fa907695665cdeecb55ac97dbf2ce42d5b2634 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b96ecdcfc5ff6f5c8bb2c6c748d4dc4fea4c4013f221ee4cbce75c1367605cc +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3e7bd1f1570a3261c3e5a4f83bfbb316c9dd5358 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a96e164632fabab7abd4e40b82d9ffddbb90bbb3261ddde437c1e7bebddcb01 +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d71f3b29f54c7c04856a9c56a09ce9e0b18d7373 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba65d5335646539f3aa28ede06c54054cea89d33efa6900bb95e22d5e9aeb0f +size 53519 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..61226b4a0a42b21eb979000d4f07fd00ffa9323b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b3c90fdb13ea980144897c0236d2d8432d49c31695db60316c9665c66bcd39 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..170e46b40ac0b56c809e91059c3be6e81be09164 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0116aa51c873801bce62eab5b30eedbeb02a12112baaecad8ff8960d50e370f +size 10013 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02f2a5b21b9a8d34f2f484c9e3b6eadd815239f4 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22dcf0f8c79ea418cd5ca2568a4ae6f6315ccc56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8919938a040060bd5077eef2dfa9d7707e8483b155a3cc2449f75b6e5ccf99f7 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..73a459f32b950bf6b4ddb88c31cdf7d77951cc00 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61f4fa8fa48ba12cb1cb5e77fb6d5ce569a19424 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3490ad3f154cf17e30d467065e0d249bfda6d6ad064598f06cefb0c4ca4072 +size 87293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a19bcbbb0e4b6ec579b7ed7dc97048bff8e8b442 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1276d19ea2012e1c74fbd66b9b7fb2c4f116c5d75822dc9365bda22b1d6bac7e +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..351d209f0561d87d2ac91522986a3bc10e886f94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4aeec679a69511a8128b7836f1aa52b284c380397507732dcacb0e8d540385 +size 87293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..203864cade35b10c1fcd74c241aa4e5960548946 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753350d9678f5d9a3a7a205fed3e4aa3f6a0cb0273310fc36b07edf138c09299 +size 246784