diff --git a/.gitattributes b/.gitattributes index 355fedc5dbbfd1c9c6252157e6401d815e2994b1..8dce4e4df85fa03de931eafb9142207cb521ef47 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5827,3 +5827,21 @@ neuronxcc-2.21.33363.0+82129205/MODULE_cb05698d60b06f387ed6+a02c3a36/wrapped_nef neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_4cfdf541f39ee43e568e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_8c6b204d1bccf64885d0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de98fc65f348a2d8c295.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de98fc65f348a2d8c295.json new file mode 100644 index 0000000000000000000000000000000000000000..557b9696f1d183c593f0988019ce04b3347bdbde --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/de98fc65f348a2d8c295.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/llamafactory/tiny-random-Llama-3/dad125ca8a11952d0a3f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/llamafactory/tiny-random-Llama-3/dad125ca8a11952d0a3f.json new file mode 100644 index 0000000000000000000000000000000000000000..9d131234491cbd7f12b5cfa28fdca0fac321f52d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama/llamafactory/tiny-random-Llama-3/dad125ca8a11952d0a3f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama4_text/tiny-random/llama-4/cf5cee8c9de5d6faf940.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama4_text/tiny-random/llama-4/cf5cee8c9de5d6faf940.json new file mode 100644 index 0000000000000000000000000000000000000000..b1b2f1715c79b8924d94fb78d4f6e870c0a8562c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/llama4_text/tiny-random/llama-4/cf5cee8c9de5d6faf940.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/mixtral/dacorvo/Mixtral-tiny/3779219bc59a2ae3e045.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/mixtral/dacorvo/Mixtral-tiny/3779219bc59a2ae3e045.json new file mode 100644 index 0000000000000000000000000000000000000000..fa730a4c5315830d770af0e8877d5bc47cf90988 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/mixtral/dacorvo/Mixtral-tiny/3779219bc59a2ae3e045.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/yujiepan/phi-4-tiny-random/2ebdbeae8a00090a231f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/yujiepan/phi-4-tiny-random/2ebdbeae8a00090a231f.json new file mode 100644 index 0000000000000000000000000000000000000000..b29d47ca7ddfebfbe89e6d128deb855c85cab791 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/phi3/yujiepan/phi-4-tiny-random/2ebdbeae8a00090a231f.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6f05187d051ac3c3e46.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6f05187d051ac3c3e46.json new file mode 100644 index 0000000000000000000000000000000000000000..77e8e597e5d165321fd44ead4c6923513a58539a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/e6f05187d051ac3c3e46.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/18af48e9de9305ddddd5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/18af48e9de9305ddddd5.json new file mode 100644 index 0000000000000000000000000000000000000000..8db090bf48565ead29ee3e2363078afae64392f0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/18af48e9de9305ddddd5.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/80594a5958040f8b1ebe.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/80594a5958040f8b1ebe.json new file mode 100644 index 0000000000000000000000000000000000000000..76aaab7a4e6dd227a55f3a4112c14a6be9f5ed43 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/80594a5958040f8b1ebe.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 6, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 6, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a5cf4ee087cd9567449f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a5cf4ee087cd9567449f.json new file mode 100644 index 0000000000000000000000000000000000000000..b9888d121aa489d51a57ac28045928a1acc5fcde --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.4.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/a5cf4ee087cd9567449f.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.4.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb index 910fb21ca78c9b5103637fe5ba28e6ecd2162ea9..e1c7a90ca8e1b18d6f623143b55987c66f3a0976 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a7be97d2012aa41b5f56bf5bf86357a0dae77fd27821fa3e0737aba2fe5521e +oid sha256:8ef50e479badb0312e208336533316393fc797857eb5a2cde744753c266d7f3f size 97794 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff index 887c66d40f263958cddbd9115ba918fa59003dbf..e4bfe59d6155f39fb2dc7b02432b14d2efc62266 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_01328268a19023ed0f24+f8e6d902/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ff6c1f2a8f76328f3ffd5cd9cbb7ad62552ac54af96be0b68b754f3e79f2c6e +oid sha256:671f2968ee09229cb96bb22be152df8bd197829d6ce7ad77ef854a3963b716b9 size 410624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..64af5bea5c318d4e8d61ab32f5b2ebb8573ef3d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aeec7d218e22beefd303645f302d7a805df96146ecddac686eb936f8d682209 +size 84114 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d388eded7527420d1e7b7407f06567aeec76ec79 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7732c3ad2ac92d6eca8c6efd83baeefa27be9061021e79a069e2496d6faa00 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..854b659d69603fcbde58509631b3a7b3a07079b5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0efb0afa4ef56e94d498+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9411f69d1d13a85dfcf64804e552e540bfc24470b7f2b52f419c5eceb8f6357 +size 254967 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6b6a691c57cebfa27e38dcda056b83c29c21952 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948a51e7715bd8232b0ce82f8cca432d73c2a709e6a794d3a228525868c057f7 +size 93425 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a367cc0501399559e2e7a7119a185b4548b2737 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c30438f983884de1b30e5866ed44b55b755f839caf513e2312cbdfefc8eda3 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5a8c8f6e4d486e56522c1592c3a2f19aa44d8039 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14cda09d048ccfb252f8+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbbb4dba625a46bae0587ff94ea66196c0ebfd2bfebd5b2d8680fa00fcf7292a +size 288898 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..877edca29ae13728c4b62afa159c12473183d794 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5d22607282b38e6ccc318f3d4d6f232ae382dbbf78a4b7ff65a37c4cdfbad8 +size 82653 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b7cd26b2e80d596ea1b2a6e20b8f7d168f57971 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb1c68e78267670445d5a05741b98754ca8194705a9ab1bdfb3ce7d44956568 +size 216064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7d2ba5eec37314f84f55d7e9422159f3dcef4300 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_245d388b7b8c64a4d846+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780a4a6a0d57688dd4bc494548c72ecaebbfb862d6d3e880442cc279fd2d9437 +size 224275 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb index a151e0db66c9936e4eb4c88ec4b2d7c748e10e2d..d396e291c54b3b1e1c83a9761d827ea5c2b5a5a3 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4b6486a12378501e2484cf81b9f6885eabca76a7d817e3e2041c1736bac16d7 +oid sha256:6b56b0d331077542e72e5da19741dfcd8d5d6bdd01d35c1cdbe74227327956a6 size 82772 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff index cb31650fc005c2de4ee977a846b33ff3a5fb7cb3..24f815ffa5bd57a7edac132c9d55d934ee370402 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2672b897bf6327bea9e4+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c65b6b3a5f7c34ae231694981cf76347fb89393c25990695cd4f921f3d243e3 +oid sha256:d1cd6ca36926fad065cd480b6e705f247170aeb533426465d808212c1b7a5123 size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..715276b152d1aeacefa3ce59da9fa1ce7dc8a9f0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d63d7e965d9319351949e66cbad9a3c799e1687254fc9e9b2a96c87adb32fe +size 865291 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff98c6bd8edac01cae66e383e13fe31008b6d413 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b4f8ca1cd6dceeea9e1f3cab2cd6d38961323966034736ce0190d3011373fd +size 4967424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1844fb78a1acce2eac236320d48e2168e388de60 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3a758a7122ca68a1243a+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c379355b24f769c6ef1b9dae63ff12937987e7a12f79d81471854f83c88f565c +size 5133948 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb index 96b0b9d0aac667bdfb4b89f452b77fd0695d787e..dbe1f9441e2a7b2c4185bbafced0748b740f4d63 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76c10df9f9fadac3ce5fee74c4470b2cdade440cf97718b4545a4e3de7fc54aa +oid sha256:5231390fb7b7b3c9416cee6d063cf72f57c3e66de2333a27302fd7480aad917a size 694128 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff index ea50519fb23dc2dea11f1fddc04774675adc6431..b4b9c29f0411998756002266e85b7d95ef151b1d 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5960bdedd7549bfacf7a+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f816d25661f62f7f838c0800cb426bb2a2a552c974bc9c93181f83780ef36d8 +oid sha256:1d609c20314399dc90d7d3efc2f573991a8799ceb9c8c13a0528387d15454663 size 625664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6b6a691c57cebfa27e38dcda056b83c29c21952 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948a51e7715bd8232b0ce82f8cca432d73c2a709e6a794d3a228525868c057f7 +size 93425 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3efe21e0a6fd6572539bbfc93fe015dadd456b17 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae99c7c352a3bf0003fc6a1d3cf17bbbd7360829224f73be9a92a36fca32245 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..04357ca8bf7906cf10221628f27f6fbbeb316006 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5ee5dade1050cf89f316+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fc3c1e2ef02e67ce1ef2daf774a482f12c32bcea4cf8e8c91ce3505d1ee588 +size 288898 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5a228a7a4cc7c7df9e8b4887d22dcd862c2f60f3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "-O1", "--lnc=1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--internal-enable-dge-levels=vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3e15a31cad10ce1cb55cdd26b518a6a5798991d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c364e560eed61f02284b32afbec9ee92735a7fc75f5da0fdebf4e6f62981de +size 91833 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7684292c7afed82987849f4c30f477053b699538 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0a5f8c623e69c32df0f65a514ae1f29f63c1e4629f5fd16842c7f8630970cf +size 369664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..21224147e96310b42c09e7fa413d89c7544b29e8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_864a283be77e0814947c+bafdbdde/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad55b6aa538df835c51877539b04aa7ec79969d2cfbbc39edc3ecc7feb5ee267 +size 379225 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bb0e723ce8d76544eb2e2c0282427d561fb45313 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fa3d25bccfae87c411912a1318f7b09223d1a267d2024c09aef48a926aec16 +size 85024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..980554b830b79d6043b0e66f12cb5da3231e5312 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1060286be5259ba3f3a1e6e70edf309b3a5f9c2b9edfa4b7f049ca9554af484f +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0956f642a1c56013de60d20aa462a2a9f37e3282 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b2505504a54575836b0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859951631f7283c904602b69178d2c25c5a5ea29f500211582d11fd68fcb2cb7 +size 285717 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff index c64a91bd3edecb0e67429d35330d7e0c16e8b16f..0ce3bfe677e37bbe8937405949e98969e95b3235 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d52760610a001af812a+f8e6d902/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e884a06795f3dbe019d77779bf01db493d426ba8ec0ef1f7465464fa9d361a12 +oid sha256:0b32b26173495629dd59108bcb4f98b21a4b73aff9e6736bdb979ddb2c118126 size 646144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb index b928c9a2f728815784f9ace1a24d0c5aa729fc33..748c409b8729fc9463051767dad159bc8d1c7cdb 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f1d26ab78f5dcdc49abcc6e752412007c6cc8d21445bfa1a0078c7ec9c8d610 +oid sha256:fdb4342d18a3192874a17bcc4ddeadc592624a4db04b0444c931b0f2dc4e6d9b size 761066 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.log new file mode 100644 index 0000000000000000000000000000000000000000..ca4a6cbfb4540f166b46b77700bb8952d3180c7f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ad7c8ac474ec4bdeae52+24129607/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_ad7c8ac474ec4bdeae52+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_ad7c8ac474ec4bdeae52+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb index 2490c1fe7fef9d747c24ae4daa81eb948a183fc1..32942c066618e9d65cb12b4e62d9a5097ba42e3a 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba30bac1303e4d846c40c65f8e4d8a644794b6964038372ff73e155714762bdc +oid sha256:eb45ccdda05a2a2ee1cf89ec265cdb0766071dc0512a114d5a7a007c1d523b9a size 83504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff index 3612ae764be895e726e58f5ca7f02b1d988c5d24..0bc673303d2f6a85cb41332e26052b5a2a238a8e 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c26856b929d58ebddc23+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ba77eb824772e994518cb8cae28e31fdb69af6393a0fac70a28fb9d3d5f6d51 +oid sha256:36c9055608092f4be44de678aa77e873c858f75429d901c3851babbebe581dc4 size 328704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e624d0856e1911e8f657fa16776c6e8f10644f9d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab85d7c603bb13b73b521000a86de4160af14d09b8b3dfd1b565eab024b2d717 +size 678755 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..603e263e7639033cdd42450033aa62f7dc5ec10a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b4bf1b2d082b28f1532f71605140f064d6721d2350ac17f4a374fc75113a5a +size 533504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..52eb079cc3702c1b058b37a09d8bf93b3c461418 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c4913590384814c92cb6+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132cba03b88b2eaee37dea1292777e2b0f326225db8b726f7525666ba3c05d80 +size 553005 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb index bc9cfc879012a007d2e0baa000a3b24812f240e8..28ee1398fee500d4fd4ce4bd55db6a4f4eb78dba 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:488ac1572ce8e1b1ff2d6254afaf39e40218d5b117f226473da52d9ac7929c91 +oid sha256:5d12d23545ccb2c03c3978d33636c9589b7b6da434697f7b5507f28fd0062075 size 81516 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff index 340e7cce3851d2abdb5ee14285f6645d1dbe5088..0ebe76da33da7f535289f868ecf2170765c2801f 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_cb6b880fc977b7ae4bda+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50269c93a4f89b9cfe1a88721a71f9262fee57c37777da36ef0127d536e4c0ee +oid sha256:1bede074a656737d1b8bb3a4c99933c04b3172880e2528fa29965fad3b087226 size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5a228a7a4cc7c7df9e8b4887d22dcd862c2f60f3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "-O1", "--lnc=1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--internal-enable-dge-levels=vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f747ccdad84657b0dc17e367c0ea5f3f76f7a00f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a329d31fe8f44a19d767432740ad0fd5b4a5b395f46b99f9b906fa11236932d +size 72553 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..10ac39499db5b11276567f3694bf56c952163ffb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ff84d0ebb6a91826b2fe2bc821051669af29385e9536a2ca77f1dadb3b332e +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0943b1436c12eae63036aa23109c3c2a742517a8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e0ccc071060e441622c2+bafdbdde/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af26fdd7236eae7fb1f1b53da776d0b43f8c1f35f80d9d81b58bedbc31ccd442 +size 289434 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb index c8e158507cef0abd568e18481672a47be580132b..cf7107baae9339edc2d8373b74081ff19581db39 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbee7375c8021657bbb337b137190e79d46ae75cf40db4df9597daa481da9e5c +oid sha256:1250088885cd64fc4a0be3a10d2f8222c991a426632b4d7a86c18a890d6232ff size 90382 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff index 638d8a898464a976db0604cd8306a7f0af1c1262..868cb15bc41071fe8d10f01f207a994c89b1e9f5 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e899b698a237c528bb16+24129607/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30550222149aae50b173e32adaeec5e384280fdc8f3e65aa0d0a6c4ee049106d +oid sha256:de7fc6b6461262a518d9b4e6d67c1c6fe943b04806e1f726deaf29d599cb1911 size 359424