diff --git a/.gitattributes b/.gitattributes index 40400926a436fcfdd784bd59927e1b3cf468a8d4..457d8eba64cf8f8e64ea1aaf39b651a7b059174d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16146,3 +16146,52 @@ neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama/llamafactory/tiny-random-Llama-3/c18a26167d3d544bfcff.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama/llamafactory/tiny-random-Llama-3/c18a26167d3d544bfcff.json new file mode 100644 index 0000000000000000000000000000000000000000..4615e3f7e7c587250e58d0eba16622e5f4bd4fad --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama/llamafactory/tiny-random-Llama-3/c18a26167d3d544bfcff.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama4_text/tiny-random/llama-4/fc0515836b8f5d1f3ae2.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama4_text/tiny-random/llama-4/fc0515836b8f5d1f3ae2.json new file mode 100644 index 0000000000000000000000000000000000000000..25d0999f0998dd722678381ada62b653eee1eed8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4/llama4_text/tiny-random/llama-4/fc0515836b8f5d1f3ae2.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-0.6B/97503f4e2dca15bd6721.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-0.6B/97503f4e2dca15bd6721.json new file mode 100644 index 0000000000000000000000000000000000000000..d5ba311d1346f4445e53c02831e6dd90c303dc87 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev1/qwen3/Qwen/Qwen3-0.6B/97503f4e2dca15bd6721.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.5.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/04e22cfac37c408caa51d61b493ee185fc351c2f9db86171e024b5d0ad5b25c1/484ee555a60558b25118.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/04e22cfac37c408caa51d61b493ee185fc351c2f9db86171e024b5d0ad5b25c1/484ee555a60558b25118.json new file mode 100644 index 0000000000000000000000000000000000000000..e9b9126c84ac59c9a05e9c8e7b095d6780335bd7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/04e22cfac37c408caa51d61b493ee185fc351c2f9db86171e024b5d0ad5b25c1/484ee555a60558b25118.json @@ -0,0 +1,61 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-qwen3", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-qwen3", + "checkpoint_revision": "81d6f5f5e05ed53ea8a1d19431266a486e46bbd8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/llamafactory/tiny-random-qwen3/484ee555a60558b25118.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/llamafactory/tiny-random-qwen3/484ee555a60558b25118.json new file mode 100644 index 0000000000000000000000000000000000000000..e9b9126c84ac59c9a05e9c8e7b095d6780335bd7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/llamafactory/tiny-random-qwen3/484ee555a60558b25118.json @@ -0,0 +1,61 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-qwen3", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 36, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-qwen3", + "checkpoint_revision": "81d6f5f5e05ed53ea8a1d19431266a486e46bbd8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.5.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..179d0957939c8908ce863b1b2a0743b1117c6ab7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dbc00ee7858cb46fdbb68cacceb3372d7470cbebf8896e39ea2af5d8efc62b9 +size 52101 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb8e8da3360aa3275027a8f6ef6039b4c2d1ef31 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_766b26a05d7144c2d188+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5ecd710d55ed09335de5557912db1b30262439da7e0e806a26135ecbf1e150 +size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22b260f9bfd0ad3bbd01e868e36189ea47cc9931 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc28abc15c2cee5a25c29583cb86b7174722924e2345bdb99037bc17e6015a6 +size 843918 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67c43f7ec6c27c3da6adc020dc82057d5dd2321d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a063aa5293cb29ba4780+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26feeebcacf0f72fffda73f862f24323f9cc45c5cfee897da8b4f415eb71ec7b +size 96881664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..05a5fcde6efdfcb1eb465238f2c5009372c68e01 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5827ec37f20eb1a03f14b76da5b69939e3916eddeea598bfcc6b4e3825399f +size 758053 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f5454184c977ffbaf3ab8c06f0890bfe5fd1032b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bacbb0526fdbc417797b5812937e7f06200b3fbc689b2eddb86241579a58cb +size 2417664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5ada0bcb69780b4d623926a3b05b47ed2a6b7529 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e1bdba015878a24fad9c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aee6730afe995350e7a9eb65067244b35a8e7c710edde69449a070c803407e4 +size 2554832 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a4ca8f742f5904669f15c3b9528b51fdafb642d4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3c421871afc527b32f8ff1a4330c7ac88fb42331d3ff7e0acef5406bee1be6 +size 50068 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3fc3253dc22eda7dd3f3b29e6a8765de03f2a525 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d0ed55734988658a20a6885fd9b8a942c2b49a291fed067ed3adbcaddadb78 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8d784aa39cc5e1ec243897e134cb6420a7f5836f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f452005788ad9d57f1bf+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf1d33ff412bb350ae4589e582cdf57ebd28be41d4e216ab33c58f7ad744d09 +size 296665 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/414cb6d116ea07a522b5.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/414cb6d116ea07a522b5.json new file mode 100644 index 0000000000000000000000000000000000000000..763add980fa341dd586eda63ce15243b454eee66 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/414cb6d116ea07a522b5.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/cc4f4a491983cd417613.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/cc4f4a491983cd417613.json new file mode 100644 index 0000000000000000000000000000000000000000..acae871905067896f0c348957ffdd79b5d719370 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/granite/ibm-granite/granite-3.1-2b-instruct/cc4f4a491983cd417613.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "dtype": "bfloat16", + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/7cba097179296102aa62.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/7cba097179296102aa62.json new file mode 100644 index 0000000000000000000000000000000000000000..ac28d367a9e59679763863a089f1e6e78a5d1dd2 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/7cba097179296102aa62.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8d8e8248ce11584e8d50.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8d8e8248ce11584e8d50.json new file mode 100644 index 0000000000000000000000000000000000000000..f8dbbe756cbe456ef31a2131f5ce108c4f79de47 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/8d8e8248ce11584e8d50.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef1d4a7a1638b14a3c76.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef1d4a7a1638b14a3c76.json new file mode 100644 index 0000000000000000000000000000000000000000..db327448cff80a66fc9d3548a9a8cf3d3db8970e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef1d4a7a1638b14a3c76.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef6b6bd6744b8a946b4c.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef6b6bd6744b8a946b4c.json new file mode 100644 index 0000000000000000000000000000000000000000..fdda4b02f47da2159e4e392da6a70238e8951b02 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ef6b6bd6744b8a946b4c.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/872e00b1deeddbe2cc36.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/872e00b1deeddbe2cc36.json new file mode 100644 index 0000000000000000000000000000000000000000..38a512887d2c6c9c58a57dc51cb4f659352ea7e3 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/872e00b1deeddbe2cc36.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/c534b45b477bd1e40237.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/c534b45b477bd1e40237.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ba8374838a1977db1b46a6659cbe66d6b35d5a --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/phi3/microsoft/Phi-3.5-mini-instruct/c534b45b477bd1e40237.json @@ -0,0 +1,164 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3.5-mini-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3.5-mini-instruct", + "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1.0800000429153442, + 1.1100000143051147, + 1.1399999856948853, + 1.340000033378601, + 1.5899999141693115, + 1.600000023841858, + 1.6200000047683716, + 2.620000123977661, + 3.2300000190734863, + 3.2300000190734863, + 4.789999961853027, + 7.400000095367432, + 7.700000286102295, + 9.09000015258789, + 12.199999809265137, + 17.670000076293945, + 24.46000099182129, + 28.57000160217285, + 30.420001983642578, + 30.840002059936523, + 32.590003967285156, + 32.93000411987305, + 42.320003509521484, + 44.96000289916992, + 50.340003967285156, + 50.45000457763672, + 57.55000305175781, + 57.93000411987305, + 58.21000289916992, + 60.1400032043457, + 62.61000442504883, + 62.62000274658203, + 62.71000289916992, + 63.1400032043457, + 63.1400032043457, + 63.77000427246094, + 63.93000411987305, + 63.96000289916992, + 63.970001220703125, + 64.02999877929688, + 64.06999969482422, + 64.08000183105469, + 64.12000274658203, + 64.41000366210938, + 64.4800033569336, + 64.51000213623047, + 64.52999877929688, + 64.83999633789062 + ], + "short_factor": [ + 1.0, + 1.0199999809265137, + 1.0299999713897705, + 1.0299999713897705, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0499999523162842, + 1.0699999332427979, + 1.0999999046325684, + 1.1099998950958252, + 1.1599998474121094, + 1.1599998474121094, + 1.1699998378753662, + 1.2899998426437378, + 1.339999794960022, + 1.679999828338623, + 1.7899998426437378, + 1.8199998140335083, + 1.8499997854232788, + 1.8799997568130493, + 1.9099997282028198, + 1.9399996995925903, + 1.9899996519088745, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0199997425079346, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0299997329711914, + 2.0799996852874756, + 2.0899996757507324, + 2.189999580383301, + 2.2199995517730713, + 2.5899994373321533, + 2.729999542236328, + 2.749999523162842, + 2.8399994373321533 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/8a091d6450025c0ee8e6.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/8a091d6450025c0ee8e6.json new file mode 100644 index 0000000000000000000000000000000000000000..01518f012481e23b263f89b39e1f123dd2a2a9a3 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/8a091d6450025c0ee8e6.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/c616b83f7e861571a5a4.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/c616b83f7e861571a5a4.json new file mode 100644 index 0000000000000000000000000000000000000000..2d6cfab21f3ec5749cdbdf6dc0807d2043856c2e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen2/Qwen/Qwen2.5-0.5B/c616b83f7e861571a5a4.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/15567820f0729bb24a50.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/15567820f0729bb24a50.json new file mode 100644 index 0000000000000000000000000000000000000000..14dad1bb2bfd9559dde4b85cc829a535a9d35003 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/qwen3/Qwen/Qwen3-0.6B/15567820f0729bb24a50.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/712a28fb50d8df9b8b7f.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/712a28fb50d8df9b8b7f.json new file mode 100644 index 0000000000000000000000000000000000000000..94af2e8e56526f1cbe8f13c0388ea0f53015914b --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/712a28fb50d8df9b8b7f.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/adef3b89bdb4ad2d0d52.json b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/adef3b89bdb4ad2d0d52.json new file mode 100644 index 0000000000000000000000000000000000000000..aed889648680ade9c87f0c843f03a0f0b7f87675 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/0_REGISTRY/0.4.4.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/adef3b89bdb4ad2d0d52.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.22.12471.0+b4a00d10", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev2", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cf5c72cdbd3b1e37ad56ef19847c715e189bdff6 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f254aa5169922e4a556d5a930e32e0a1463be42f650836f188c610b69536e08 +size 2021465 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b537b413f2f5c1ba7650880bdb315ce623a9f8f --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_08acd4bc4807cfbcd65d+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46dc9327f437bc15d6f0fff9482b9e27802ee20235745f2076f149d114adf38d +size 78152704 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bdf724c695a238fb1be85a19f1572c430ed366f0 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aeecb8b72e16e007ce88f140297581b374655883620d17ff7ccc341c5fcf93b +size 2793330 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bf90c318e3c452d90a61e6fa5a326a01c4d61f97 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_111230f06661c0337f18+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcbbaa33eadbd7c8bda67a6084a8e9bcbd211ff2ae7b3b5e11f0b80ba9ff1b15 +size 10056704 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b36ad992fd7538d6d30a99803405b303577671a --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48920e8d4727caacdad326f16d9eeb776ff49648e0879c34b0e6fe3e1c0f3928 +size 3686362 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..867f89555619501129f1addedbde6c04bb6491ca --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d345b1b97695a0d489c1434dd410bd44e8d28eb294b3fb70a4426384b2116eac +size 7353344 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4e276334db911bf2465d6e4b38b19b4f337e06b6 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_146d6b04dbd7dcaddb2c+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5be53ddc522de17b61d309a7fb129b1dfac4947e9987183e6709fa7f364e110 +size 7538083 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b45632a88069c0eced9e00af264ff8edab851470 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc2a1822446be22787aba630ebd3afc4eb3886dd20c176a884e09ad0661d6b7 +size 1035415 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5422a020e2f132a56ca75a2e4c21a3ac1f2d22ba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_1d6d6a1a61df4c051b29+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96486b3fb1c0c2326987a14670904a132c42119b7d647da93a3ef2b38b86311 +size 4353024 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ffffb2d8bf1ad71822d23bd8ad8dac42ae296653 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55ca1352e80d775467085fab3aeb708b87f36991351f814428c965f4f84b059 +size 1244122 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b9ecd0e59e4a291553085cb5d07137c9641e429 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_31f6e41d55f1feb6406d+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a14e8db205b57262891b93d28ff6247fafc2a578d1fd2bf264236c1b2a8a9f67 +size 10650624 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..c27e6c39a7a4c3b736e614d7725e8bfa29486eb1 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d280ba7f0e17b145abfd5485847abb715424e4b2 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adca90e5210fcdc89499b0349cd070a5f972a66f6b16bd353b7ce5e903dc065e +size 1495241 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e6a185788ffdaab7a948da1a7e3fb609a9f87ebd --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3d472eb72271db1b3ca1+0788fa03/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6821ebb6adb793c7f4b0c1c475c8868824ba9365e604d9cb2d72c406fbecd4c7 +size 4035584 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..943c277f524369d08b1593435158d22b010b2545 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f55db840f401fe0331fad055188a790623646c91990bf3de0a4e19ae93d364 +size 2238280 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2911abada7f0ac9f96e062cc951d3fe5ff9cbb5 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_3dc9495d24b0d51b218e+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625877d211615caaf1d259e828e5501360fc805137d3c51ce1ec6c5ea603301f +size 41278464 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b9293911bcb6d3dc609bb9126da37cda7f737ec2 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7697e4b6500476b8dc53175bd79bada55293fe4fb521705d95881c51397018a4 +size 1382670 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6c5cf68a4af0077e117dcc113f89601f5538408 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a1318f517ca14130ca93f1baf7740027118a1e5c9d79108b66d6ce8f527678 +size 2110464 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..16599cc0982ca099030155b2937cae3feb8bf91c --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_518a65569b3768d4b20e+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d8df7b86c818ecdfb24c78fef32cf4d170b2329eb59ff074c8cb56af58bfa2 +size 2203380 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e04a8ab012a1ef78f0af1f64ce14e655bf966680 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2973d0dc31b736156f70843eac62b20e2bf0b71bd1b7d8d7c5c9b329d0f9135a +size 3085474 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b58e51aecf7f9e878f112f57e7d6364c1d084390 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251b5f5172586646e1301b337107620a5a28cc011e163f948f0b65b1aa9540ac +size 5704704 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..10025720172994f8c42ada5edc9401cae4e2cd18 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5261a95c60daa45c8ece+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7969db0870f49e1826477e2edfe64b777943e574f077b02890db1f8535d56682 +size 5871228 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b3affa7813dabf1502736274f85fdf7adabac95 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c228c3baf84d40bcce2b9b38a041ead1f84202be8afbd67bdf2452dd7b064260 +size 1229072 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..566b8aa0932b784e3495c7768a7f435d7a9e910e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_542b091498df3056d0ea+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015d802bd62aa50ee67be9b38135d39c922f1e94f51bc8f93d94937bb99e0350 +size 4035584 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f61322db89cabbc798f672ac5cac827945efb4f0 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61e3902b4c583dab5664b1c4b9d2ab4d23f0dc61fab9157ce21fdb24d28f19d +size 2510366 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b7e78273f99d3369846f5b26caea495176965bb3 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e22d031324618a49012f2f4b26b9decc1ab7f7b276d4fa0be7db9bf7940bc0 +size 2622464 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fe33827c2bff580307425b1870e0437373a7ece2 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5752aab65a36bdd0d90f+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b12d57a730c4c9c311c574e8f10c995ee37cd7cf02cc069cc77e098166c13b +size 2759632 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..85a3aec808f76dcef8ebf8b7b93b9678eeba6125 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c50c56a0472ed2113834bba8eef8481fbeb1ee48fc5bd86fa3ac0265e312882 +size 2155363 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93bb2b7f339d029b34a512bb890166b6a3e4d684 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_58cc10b2390031bade01+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7895635b9103888d8fabca92d4905eea2d5246c8dc4d49a5e6f1eba0e81ec0b4 +size 5612544 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8016be3a38d0a6265c57c918edd2fa717716db0 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be004f3341d556e60c457124631ff44ee3229adb6e570c952b91eaaa55f05689 +size 2417495 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95bd45741ecb259429ced16a1c60618d05b08ebd --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1a1ea794c274f3b0138ec9bdea43f4f7691f4c17a958acd8a03ccc55408544 +size 9759744 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..59760be92751e94bd17d74d0aadcdae93b1a4734 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_5cc3450d0139cc96fc2f+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67872a80c10da000b0ac55e1ec7363da3a14a03edbcea617091ca31df14fad6c +size 9906760 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3fb0a97d74d93812634ff0614e0acd3fb7466869 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c2a5a48aff909d22c1d66807b5c6d879653d3f92fcb25bc8f35f861faf65cd +size 2435751 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2617cf962bd7f7d05162e92f5f25a0b684dac012 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a586ff33f6b15e6bd7eca107fe3bcc484c2cc9789e48165cc6ddf41136a59b +size 4025344 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a9638dd965d32a77bde02d1bcf6dc116c0ea3d69 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6278a2b3bc7ad7623be9+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed8b4d9f898a419532fa5b093fdf43c9917361705551c23faff1f6164e1af3d +size 4162401 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..493d7909c0a0eaa46cde458ad872b2c69060d4f9 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3267a3decfc26ca238d019d6b54e64c56e64246e32c0fa1e106ede2ebe566f +size 1452599 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3befcdda68cad1a05a461c5a392fb1312c80e4b --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf1b0426bc35bc76b9aff1487c2fad7408900c74327eecdd001338ebb25ab24 +size 2591744 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9177d8567c0c694750022b9749600a7e2871f551 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_642f85e96612d2a04ae8+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5043e4a37ee8b9c4413593a391b30d698dbf3a874e99f7c102b1370671ddb3 +size 2665831 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ab41fd6756b8d5270aac8b08296c2e4ff732e89 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed10d67c7746a031ee48d23caf2884d5eb253c37ae590155accb16882042c5b +size 1390782 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2bf968ed145df7900accd84fbd3393f7bdbef916 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8faf4aabd1323d8efc4f98bfdf2ce867cd0d2a7ba05ea8e7c6b1803ee52fd3 +size 2110464 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c2765024f7afc3e806df1d1b0eeaef9f705f15e7 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6c9bb6718daf6128f1f9+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e5501699c4cc482e559ea40caa906c4c4e0da34f7d5a77c5ba20b4738b8a95 +size 2203380 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..becc8801470145c7781acdc853f5408cfd1d19f8 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc0cd3818747c6cfe6dc358e24141d59c5d305229af98c0e64e12fc14d2893b +size 3367384 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f6f103ba3bcf6db85d20eb4fa4d81d098d6f9e9e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6dc6f5ac3fbc7e2f9ecc+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919597afbfd1f495c39aa163b36cdbfd56ba88c47efc48dd0a691f3b9d6277f9 +size 3677184 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6f256488cc351931f6a4f10112253b640ec3427b --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4d40f0d9e600a783f90bdd0305ebfd70930c1b38a04df89b18eaee30a55944 +size 2223618 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aaff8971b6fc9e25c207379f217385a6400cc3ae --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_6eaf33246f80184d4fd1+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584e69d2a66aea3fcec2564065e7f4c554ec07297b7cee52032cd5c8559d10f5 +size 13097984 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b966bba5b379fbde80e1960004ec5a05ac7b02be --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5acbaceb97a6969d89061b06a6b84735717ae27be30a785adea3e6fe61310f +size 3154750 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..20e2a704b0ba6ffe9f968a0f1d328605a9410ac5 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_79d57ef533134608d5c8+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea3d39541d5bb6875cc230c819d2dabec2b259c488a958b4a59ad84ede83874 +size 10650624 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..86c5617bc31861715f83fa6429fbd9ed9b500838 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246234588e9817c5ce8e1e855897f13c54ce5fde01d1746cf5f2f7f8d4d27ebd +size 2079519 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f30c3746af353bcee28c11d2332190b3c94728e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_809c530e776ad7a7e310+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b1581d1bfa6cf70d09f0fa93a5bfd7b79a2ec2fa2f82239ae81b7dad63301b +size 28017664 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9778f767389ef3b80b2f140dff1d8b017705905e --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9750a410dbfa6772c1f9+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045645e64d810f85c0db20a7d1cdefa456674d58a9af754e1e55318634b38a77 +size 3056674 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a5d1158b5a04f91e7cd2bf0920d6af47a766d70 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2f73ebae53b1b74ec3758090db463e9a712007e68cb26e173113f4f74911589 +size 3067478 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..336dcb67da3c02cd0c8b8ebd15417e3d704b4e2d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bb55d66844617a0c0c911da22321831ed4d928e0e2437c27f45acfda20b8c4 +size 7261184 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..47203f66a7efcc678156dfd1894b154e8578f124 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_9dafdd0d7ec6ae871e3f+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287d64bdf3056b472586de0bdaddc150c54fe0a97a8e58d03468c21de6b010fe +size 7427708 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b8bb914f6cf1ff3c0b28e7d06e8ccac402beb6ab --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a866ff9bc01e35cff2d1b082b65a91e9c4b083269526c158bd36b07f831456 +size 1440787 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..47e7595c8c847773f270f334f293f43bd116b7cb --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843438adc9bc254421be5f6e753b291bb98d4883b5ca9a383c83b547e9a5b5d8 +size 3257344 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..aa2d771569c7df28af12a47a555933667fa4adbf --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_a4003d34c40172cda7fb+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8311fb7d6c190e0b9b95d84d2bc321f6a993582690eee877f72de10479efbc19 +size 3331655 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b747345918e64789a7a81215abee5fc146efce8 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47c01fcb452925d122aca0f8b3e6542fe938860d50f8eaf096c2b6544c098c9 +size 2603780 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67f68e210cd8edb99bcc7e61127cd1acfa3ad571 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_aae272017096122407cd+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6817fb658d54e2cb5d7671810f707553e9a11c8b720663e4d5ded5b56ddfb7 +size 24351744 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ecb236a32d48263c71af77009a3864796de88270 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c0af3f77e20436709250cd93beda007a2ff4196b68776f515d112386a41ea5 +size 2390316 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e3304b717633045d512f7b757b1c218952a2da8 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d9e21c05b09ac3a01cdf74d5dbdf461eb02d4bab34eeaf33673f13ed8ce5b8 +size 13671424 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4ac5496cccbc4380bf01affbc91763b2dc12aa45 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ab3e50332a1f49feba2d+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c7ca12c4359f7fef2cc3853df0363b4bd7fc9dd32f359255cbb91ea030fc32 +size 13818440 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..992cd9f9d287e9d1053a417fbe6dfee5f2becea7 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce5417df13ee8df347aed8f282ddbcb2e5adbd89cebf03e1a4407327966e1df +size 2363942 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1e3eb28df562e77b68b47320d8fe8006a3a69088 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_ba94bd052390e13a7dc4+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a1f54724851ff51444a8bb46f1b82161994472728caa9cbbcaa0a8f70038b4 +size 5366784 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef9936c92c66fa36106b6f3eb756db04990a306a --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:168f5e02e0176978ce611a495d8893402000d8f7c2a9ce89a6f26c9f38e8f161 +size 2159640 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48275079cd1e48dd11b1306ead3115b8db5e023b --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c20987d0a23e3a06e4f1e04cba88a8227fbc7d3f217bd1582300d9227bf93a +size 2151424 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fd59156bb38911d22e707d490aab86aee54ea300 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_c17c517e865950812f46+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b88070f30578b67a5a6043172b6d00ec58b527586afd7b72e24f4e0ba61ebf +size 2307618 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a540fe6e11b4bce141dd6481e4ca871b1335fb6 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b03aad212bc6460404f3b56d76f4d62f32e1f442448255a804f86fa2d598819 +size 3699669 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5626d1d6f8278ab9d6510a6e28f337fbcd5ae55c --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419d519decea640e4517b2a8bfd69f7a5542426a7cb00f8db822b2e8c99a8616 +size 5520384 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cba76bbc898753d4a43a42ae67d0307e81d43bd1 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_d0452314eb37f198d415+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e52c4e287b2607f5227203bcd19c470f8a645fc9b037d3d101d59c21233deb +size 5704563 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a34bb21dd366ffa924ed7c54d064feb0440d76ab --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad715051d2acf3e39c3dc5ff057e1de3abbe5b4824fcb13caf98393fbee4f8e +size 2231406 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89b6f287b5870fcf11b462ff5a3b78bdeaa42738 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff4980ceacaf3d1772032d2700986356f458a2506190da7cd8fdd4efefd2671 +size 1905664 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/wrapped_neff.hlo b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9b08edfdd10d451007b2d56fd336fd9cf402f247 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_dff38de8d9da313a5851+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da01622e916ab3267945463ccd4c540b66865c2f6a1556ab470b8159325f26c +size 2061970 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/compile_flags.json b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.done b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.hlo_module.pb b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f68646ce53c1fa2b19d52e45aa34ef62a86c7a66 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a0230458132bb1307ec47224775649d6657b582451a1f14abadea3d7a25aa8 +size 1317647 diff --git a/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.neff b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d19ff03665d2d6a7bdd99f9ece67a7f1ae390440 --- /dev/null +++ b/neuronxcc-2.22.12471.0+b4a00d10/MODULE_e4993f7a0e2a6345eaaa+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e3b29e7048913cbc4d4a08a8f8852ef162111f1c9c4a8bd8183d67095b21d9 +size 4219904