dacorvo HF Staff commited on
Commit
9319d3e
·
verified ·
1 Parent(s): 76721b6

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +21 -0
  2. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/180ffc12360632ca0552.json +58 -0
  3. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/4df385f6aeaa42fb9756.json +62 -0
  4. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/b787c4dd68458fa32228.json +58 -0
  5. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/2502f9dc059367ba4857.json +65 -0
  6. neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/compile_flags.json +1 -0
  7. neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.done +0 -0
  8. neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.hlo_module.pb +3 -0
  9. neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.neff +3 -0
  10. neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/wrapped_neff.hlo +3 -0
  11. neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/compile_flags.json +1 -0
  12. neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.done +0 -0
  13. neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.hlo_module.pb +3 -0
  14. neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.neff +3 -0
  15. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2ac190bdc4561a89b8aa+5ba4a294/compile_flags.json +1 -0
  16. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2ac190bdc4561a89b8aa+5ba4a294/model.hlo_module.pb +3 -0
  17. neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/compile_flags.json +1 -0
  18. neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.done +0 -0
  19. neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.hlo_module.pb +3 -0
  20. neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.neff +3 -0
  21. neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/wrapped_neff.hlo +3 -0
  22. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/compile_flags.json +1 -0
  23. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.done +0 -0
  24. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.hlo_module.pb +3 -0
  25. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.neff +3 -0
  26. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/compile_flags.json +1 -0
  27. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.done +0 -0
  28. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.hlo_module.pb +3 -0
  29. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.neff +3 -0
  30. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/wrapped_neff.hlo +3 -0
  31. neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/compile_flags.json +1 -0
  32. neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.done +0 -0
  33. neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.hlo_module.pb +3 -0
  34. neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.neff +3 -0
  35. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/compile_flags.json +1 -0
  36. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.done +0 -0
  37. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.hlo_module.pb +3 -0
  38. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.neff +3 -0
  39. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/compile_flags.json +1 -0
  40. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.done +0 -0
  41. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.hlo_module.pb +3 -0
  42. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.neff +3 -0
  43. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/wrapped_neff.hlo +3 -0
  44. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/compile_flags.json +1 -0
  45. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.done +0 -0
  46. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.hlo_module.pb +3 -0
  47. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.neff +3 -0
  48. neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/wrapped_neff.hlo +3 -0
  49. neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/compile_flags.json +1 -0
  50. neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/model.done +0 -0
.gitattributes CHANGED
@@ -5296,3 +5296,24 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.neff
5296
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5297
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
5298
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5296
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5297
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
5298
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5299
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.neff filter=lfs diff=lfs merge=lfs -text
5300
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5301
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
5302
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.neff filter=lfs diff=lfs merge=lfs -text
5303
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5304
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
5305
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.neff filter=lfs diff=lfs merge=lfs -text
5306
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5307
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.neff filter=lfs diff=lfs merge=lfs -text
5308
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
5309
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
5310
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5311
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.neff filter=lfs diff=lfs merge=lfs -text
5312
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5313
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/model.neff filter=lfs diff=lfs merge=lfs -text
5314
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5315
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_ceeb2de518f3f121f9df+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
5316
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_ceeb2de518f3f121f9df+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5317
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f20adcf25e6f9d0f0500+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
5318
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f20adcf25e6f9d0f0500+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5319
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_fccf2ddf2bf73205ff5e+be13b572/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/180ffc12360632ca0552.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
25
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
26
+ "continuous_batching": false,
27
+ "enable_bucketing": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 1024,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 1024,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.1.dev0",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 1024,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn2",
45
+ "torch_dtype": "float32",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 4,
49
+ "num_hidden_layers": 2,
50
+ "num_key_value_heads": 4,
51
+ "residual_multiplier": 1.0,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000.0,
55
+ "tie_word_embeddings": false,
56
+ "use_cache": true,
57
+ "vocab_size": 49152
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/4df385f6aeaa42fb9756.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
23
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
24
+ "continuous_batching": false,
25
+ "enable_bucketing": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.1.dev0",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 1024,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn2",
43
+ "torch_dtype": "float16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 4,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 4,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 8.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": false,
60
+ "use_cache": true,
61
+ "vocab_size": 128256
62
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/b787c4dd68458fa32228.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "batch_size": 1,
19
+ "capacity_factor": null,
20
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
21
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
22
+ "continuous_batching": false,
23
+ "enable_bucketing": false,
24
+ "ep_degree": 1,
25
+ "fused_qkv": false,
26
+ "glu_mlp": true,
27
+ "local_ranks_size": 2,
28
+ "max_batch_size": 1,
29
+ "max_context_length": 1024,
30
+ "max_topk": 256,
31
+ "n_active_tokens": 1024,
32
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
33
+ "on_device_sampling": false,
34
+ "optimum_neuron_version": "0.4.1.dev0",
35
+ "output_logits": false,
36
+ "pp_degree": 1,
37
+ "sequence_length": 1024,
38
+ "speculation_length": 0,
39
+ "start_rank_id": 0,
40
+ "target": "trn2",
41
+ "torch_dtype": "float16",
42
+ "tp_degree": 2
43
+ },
44
+ "num_attention_heads": 32,
45
+ "num_experts_per_tok": 2,
46
+ "num_hidden_layers": 2,
47
+ "num_key_value_heads": 8,
48
+ "num_local_experts": 8,
49
+ "output_router_logits": false,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_theta": 10000.0,
52
+ "router_aux_loss_coef": 0.001,
53
+ "router_jitter_noise": 0.0,
54
+ "sliding_window": 4096,
55
+ "tie_word_embeddings": false,
56
+ "use_cache": true,
57
+ "vocab_size": 32000
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/2502f9dc059367ba4857.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
26
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": false,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.1.dev0",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn2",
46
+ "torch_dtype": "float32",
47
+ "tp_degree": 2
48
+ },
49
+ "norm_topk_prob": true,
50
+ "num_attention_heads": 2,
51
+ "num_experts": 8,
52
+ "num_experts_per_tok": 2,
53
+ "num_hidden_layers": 2,
54
+ "num_key_value_heads": 1,
55
+ "output_router_logits": false,
56
+ "rms_norm_eps": 1e-06,
57
+ "rope_scaling": null,
58
+ "rope_theta": 1000000.0,
59
+ "router_aux_loss_coef": 0.001,
60
+ "sliding_window": null,
61
+ "tie_word_embeddings": true,
62
+ "use_cache": true,
63
+ "use_sliding_window": false,
64
+ "vocab_size": 151936
65
+ }
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_97f69534-a6f2-4995-adc2-d7d40068808d/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35
3
+ size 1165
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd5b096badb6086196ec5a04499d8965d0c198bf3be7cde2e994a3ae44d106c1
3
+ size 123904
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1b54f21d033a73ffbcdc+a3f02148/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0c8403de522d314bb6bf3090240712eaa54c727536d7d054514ff3ea393706f
3
+ size 124800
neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a6eef6ba0f2f8f2e5b2b08d0b1267a1cb9f5d863c6a9663b8734eaf119d5c7
3
+ size 1299219
neuronxcc-2.21.18209.0+043b1bf7/MODULE_24cfc117f33f64ccf2cb+877608f3/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d4107a5d0bd4ba3554f897e6813755bc970d8a204e1d6b43c18a6d45b9017a
3
+ size 656384
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2ac190bdc4561a89b8aa+5ba4a294/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6b4d1de6-ba8a-46e1-b742-8132c4bd3bf0/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2ac190bdc4561a89b8aa+5ba4a294/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044
3
+ size 8979
neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c051e4879cfd8bdbf2d56c9246144686cdf549884c14709e9a26b9a0cc7f646
3
+ size 1338548
neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1976a4dde9fde2a2b9ed5c9e9560c7003622dd4a301111dd8cf47e3dad9c9882
3
+ size 646144
neuronxcc-2.21.18209.0+043b1bf7/MODULE_48cf67a7f3630493577f+d0d57c8a/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5471821b8db21496fcfb39dc4a8b0b5aa7fcfe98b7480c8497f8ff717f97bedf
3
+ size 655842
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61fff4fd459f6f5b9436447069544757836fd8c198c2d87f985898effb2e01f
3
+ size 1124430
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a2b64355e0bb619a721+877608f3/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e330b18cc3560dd7ac1256b72ea18762742985a49cf46e59aa6ae1b75a1ad73b
3
+ size 451584
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec57d9d2f9d9be197ad88a58ece6b14b0ccab97c121b8d20cc9512558ddf6562
3
+ size 70276
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddafe107c9ccd625e44c60097b05c006d5591a36d01e1d8e997e8b69be0fb2e1
3
+ size 431104
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+0b01cb42/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5a1335ac17ebfa49e49622676cbc6563c7b29018ab34cd018858af4a3aac85f
3
+ size 443199
neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b999ea9ba89a487255da2b130a2bdbb271cf57445eb26ac94e78bd297c00b1c
3
+ size 1365236
neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d90bc000559d461c79f+564b7b5b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe57e6c82ac454d00dff3baccae6b36d5841c3b369aa030a6414bb8d4e02a9d
3
+ size 687104
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77782fdab9f56144e7d5c218eb8e23aa06187b6bfbc1434af59c8ae366258595
3
+ size 1302867
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7387ca39d94080c41e25+877608f3/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9faefa6276b0561549b3f3ab9f7cb04fe7d6b312259195c30c152c84578139
3
+ size 646144
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4d5ebca4732581ccc5392f3c3130051c9b01dc4710efd3ec99ce56f613df40
3
+ size 1280147
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfbe9cc8894d956087bc7c3317a0df313f6e8e4b49e74b38669803c9d2120b03
3
+ size 594944
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7af2912e9b351fcd249a+747527b0/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d8671898a565e03f8068814a8d8637b8ca58e9839847b9f7268e52f5c7a6ca
3
+ size 602863
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_c1269219-9671-4c14-9da1-4397b35e378f/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044
3
+ size 8979
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4188259ebeabb332ffe619985d7daaf343f25e4649408ac88e022cd2e0a3ae
3
+ size 277504
neuronxcc-2.21.18209.0+043b1bf7/MODULE_7c1663040965c8138539+313ee46c/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5187b88130966d0ada116d44c427f93585ca2c0ac12359a54ad0efe396e843
3
+ size 280328
neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_fc8e4646-569f-4184-bf6b-d8e0eb33849c/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_a87943be1c24879a03e7+aa7059d7/model.done ADDED
File without changes