dacorvo HF Staff commited on
Commit
9c18777
·
verified ·
1 Parent(s): 3c821e8

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +8 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7ba8c743ae51687c222d.json +88 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/b38c4dd37be80535e108.json +88 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/01ab39323144501d6eaf.json +88 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/a28e46704c048d210631.json +88 -0
  6. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-0.6B/a28e46704c048d210631.json +88 -0
  7. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/b38c4dd37be80535e108.json +88 -0
  8. neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff +0 -0
  9. neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb +1 -1
  10. neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff +1 -1
  11. neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/compile_flags.json +1 -0
  12. neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.done +0 -0
  13. neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.hlo_module.pb +3 -0
  14. neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff +3 -0
  15. neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json +1 -0
  16. neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb +3 -0
  17. neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.log +1 -0
  18. neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/compile_flags.json +1 -0
  19. neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.done +0 -0
  20. neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.hlo_module.pb +3 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff +3 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json +1 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done +0 -0
  24. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb +3 -0
  25. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff +3 -0
  26. neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo +3 -0
  27. neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/compile_flags.json +1 -0
  28. neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.done +0 -0
  29. neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.hlo_module.pb +3 -0
  30. neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.neff +0 -0
  31. neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/compile_flags.json +1 -0
  32. neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.done +0 -0
  33. neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.hlo_module.pb +3 -0
  34. neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff +3 -0
  35. neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo +3 -0
  36. neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/compile_flags.json +1 -0
  37. neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.done +0 -0
  38. neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.hlo_module.pb +3 -0
  39. neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff +3 -0
  40. neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo +3 -0
  41. neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/compile_flags.json +1 -0
  42. neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.done +0 -0
  43. neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.hlo_module.pb +3 -0
  44. neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.neff +0 -0
  45. neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/compile_flags.json +1 -0
  46. neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.done +0 -0
  47. neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.hlo_module.pb +3 -0
  48. neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.neff +0 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/compile_flags.json +1 -0
  50. neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.done +0 -0
.gitattributes CHANGED
@@ -17042,3 +17042,11 @@ neuronxcc-2.21.33363.0+82129205/MODULE_eed91f115fe8c2176712+4394b9d6/model.neff
17042
  neuronxcc-2.21.33363.0+82129205/MODULE_eed91f115fe8c2176712+4394b9d6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17043
  neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/model.neff filter=lfs diff=lfs merge=lfs -text
17044
  neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
17042
  neuronxcc-2.21.33363.0+82129205/MODULE_eed91f115fe8c2176712+4394b9d6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17043
  neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/model.neff filter=lfs diff=lfs merge=lfs -text
17044
  neuronxcc-2.21.33363.0+82129205/MODULE_efe24876903fe1364678+3ad1cb98/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17045
+ neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
17046
+ neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17047
+ neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17048
+ neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17049
+ neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17050
+ neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17051
+ neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17052
+ neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7ba8c743ae51687c222d.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 6,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 6,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 0,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/b38c4dd37be80535e108.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 0,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/01ab39323144501d6eaf.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 40960,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 1,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
54
+ "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 1,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": true,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151936
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/929b02754a13cbfdf657d863c3fc6f3bce672879bc6ae48ab45be21e881e9ec2/a28e46704c048d210631.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 40960,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
54
+ "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
55
+ "continuous_batching": true,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 1024,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 1024,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 0,
70
+ "sequence_length": 1024,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151936
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-0.6B/a28e46704c048d210631.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 40960,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
54
+ "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
55
+ "continuous_batching": true,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 1024,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 1024,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 0,
70
+ "sequence_length": 1024,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151936
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/b38c4dd37be80535e108.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev2",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 0,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_17476209562158013765+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d18c81c6ab7ffca4593ffd12280271d86ca9a2700a2770da93fb314a9109ff67
3
  size 509380
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e617c5569524a80c9f6d97e87ad348e3c21889de715297f18f50d886f1e81e
3
  size 509380
neuronxcc-2.21.33363.0+82129205/MODULE_196d1b2148ed8629b154+24129607/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45d13b5a6f47ee0e1c7c9b783470687dfcad53929b4035be0b071bbacd718948
3
  size 41585664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2586baa46365618b851f6bc750452d003d50e54b8b7734e3bd272eba298d7065
3
  size 41585664
neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc433e7b703d130e091b01dadff3c52a79577463c64fe2bad4743e10fb5ed466
3
+ size 1011733
neuronxcc-2.21.33363.0+82129205/MODULE_1d8d646ed52cd9b3ce0f+6170d8e1/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a42b4aaa6cda83c40f5f7d37993ca0c5a1837b5cca852e0144cd487ad3648e
3
+ size 40387584
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb21ddc44f42e0a984b9c1c6b453bee69944f16edcc891c120a1bf6fc5006494
3
+ size 719476
neuronxcc-2.21.33363.0+82129205/MODULE_1de7a333d337fe35cb9e+24129607/model.log ADDED
@@ -0,0 +1 @@
 
 
1
+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_1de7a333d337fe35cb9e+24129607.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_1de7a333d337fe35cb9e+24129607.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-03-10T14:33:04Z [XTP004] Number of instructions (6729320) is over the threshold (5000000). - Compile under --optlevel=1 to create smaller subgraphs or use pipeline parallelism.
neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db2a056d98b98672d37a1e08b48bf6f37953aba9b4c616720de52a80dbeeabfb
3
+ size 693532
neuronxcc-2.21.33363.0+82129205/MODULE_5006ac810070ead7ed92+24129607/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a605026a3de07a515ce0814ecd12488cf6cb11c72ed4113ad5cdde2ebdf566e
3
+ size 5039104
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b82959150728b621645262435120008706a4ef119a943f7db4a6d90aeb430c7
3
+ size 728309
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ed190bc44698b591685d8b6feb0da87c5d1629abe304ab17569a3510a1a0fab
3
+ size 7117824
neuronxcc-2.21.33363.0+82129205/MODULE_68b1b688bc907867445e+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e34e480fdf7a4270bca4077b8ded8f08218d466c960d9cd822ee3690dc8e334
3
+ size 7264840
neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_69445d02-1e2d-4f77-9ed8-c9029402b637/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f96487dad4bb02b98bf2c955fe59650a5fdbcf1d763fdf56ec412b62b5774c
3
+ size 5596
neuronxcc-2.21.33363.0+82129205/MODULE_9e22b55ec8c0b02cf4ef+28fcbe05/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eefc21dfba0ef82c19273eba57dd5a0bf0554faaa2d39d11cd99f0c330170b5
3
+ size 735532
neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:120cbfa9f71dd5236635bc11c0b1e835fb832b9c0ec11ab84e350f086e92b1f7
3
+ size 2305024
neuronxcc-2.21.33363.0+82129205/MODULE_a1d5b9d5676277a73661+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842619c244312132fec3c4e8bc1ad737c011b8de21b6e4a28657d07f6e0b5dc3
3
+ size 2442081
neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64981f2c590ab72d27b23aa6dbaa7507715d804527dff4824f5b8a704b2e29a8
3
+ size 426769
neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7ccb3c563a4c0fdae234892df68dcd6c60cd41674bf983c7a5693709d2af9c
3
+ size 3073024
neuronxcc-2.21.33363.0+82129205/MODULE_ad16674dd3fc5a51c226+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a6ac7c0556c0e218f9e95cabc999fa5d73cb79aabcc96ccbc57e3c5325e36e9
3
+ size 3147125
neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_4e549635-b641-407e-bfcf-0de9fb256e44/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a258ce111db27dc5ba46ec4f9f6877c56b0777a5534c0eba14488ff9eaf298
3
+ size 5596
neuronxcc-2.21.33363.0+82129205/MODULE_b54a5aa9a7bb0faeb6b1+940786cd/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_de4f3c0b-dec2-4039-854c-30a1f038d511/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e542c76cf27c3f807feac47dd3f58fd4c308e8d9e932f0732774bee19d640d8
3
+ size 3881
neuronxcc-2.21.33363.0+82129205/MODULE_e79d2ef691b5cd91e631+d4248311/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1a5ee6fa-840a-4fa0-b1c4-3b83c44912b0/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_f1638265f2796bf1b8b8+81944ebb/model.done ADDED
File without changes