armwaheed commited on
Commit
71582da
·
verified ·
1 Parent(s): 0ae009c

Adding ONNX Kleidi support

Browse files
.gitattributes CHANGED
@@ -4,5 +4,3 @@ text_encoder_2/model.onnx_data filter=lfs diff=lfs merge=lfs -text
4
  text_encoder_3/model.onnx_data filter=lfs diff=lfs merge=lfs -text
5
  tokenizer_3/spiece.model filter=lfs diff=lfs merge=lfs -text
6
  transformer/model.onnx_data filter=lfs diff=lfs merge=lfs -text
7
- mmdit-x.png filter=lfs diff=lfs merge=lfs -text
8
- sd3.5_medium_demo.jpg filter=lfs diff=lfs merge=lfs -text
 
4
  text_encoder_3/model.onnx_data filter=lfs diff=lfs merge=lfs -text
5
  tokenizer_3/spiece.model filter=lfs diff=lfs merge=lfs -text
6
  transformer/model.onnx_data filter=lfs diff=lfs merge=lfs -text
 
 
text_encoder/config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/text_encoder",
4
  "architectures": [
5
  "CLIPTextModelWithProjection"
6
  ],
@@ -22,7 +21,7 @@
22
  "output_hidden_states": true,
23
  "pad_token_id": 1,
24
  "projection_dim": 768,
25
- "torch_dtype": "float16",
26
- "transformers_version": "4.48.3",
27
  "vocab_size": 49408
28
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
 
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
 
21
  "output_hidden_states": true,
22
  "pad_token_id": 1,
23
  "projection_dim": 768,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.51.3",
26
  "vocab_size": 49408
27
  }
text_encoder/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40ba7d326db174d13571139f6fab5ad901ee80a5f4d00ea6cb49517e05cd4f2c
3
- size 494947228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f50c95b5b7cffa9e9a666c6af99ab8dce8c3df9b2a35449a888a528b0a74adb
3
+ size 494839363
text_encoder_2/config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/text_encoder_2",
4
  "architectures": [
5
  "CLIPTextModelWithProjection"
6
  ],
@@ -22,7 +21,7 @@
22
  "output_hidden_states": true,
23
  "pad_token_id": 1,
24
  "projection_dim": 1280,
25
- "torch_dtype": "float16",
26
- "transformers_version": "4.48.3",
27
  "vocab_size": 49408
28
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
 
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
 
21
  "output_hidden_states": true,
22
  "pad_token_id": 1,
23
  "projection_dim": 1280,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.51.3",
26
  "vocab_size": 49408
27
  }
text_encoder_2/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1237d7e82e00c7d8455a42beee4fa994eb9db30e27520647fedfb82739bed1b1
3
- size 1042490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0d634eb6d5ad375747d53dfd2f361f406adec78b3074b031be9cfbc0fba49e
3
+ size 723680
text_encoder_3/config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/text_encoder_3",
4
  "architectures": [
5
  "T5EncoderModel"
6
  ],
@@ -27,8 +26,8 @@
27
  "relative_attention_max_distance": 128,
28
  "relative_attention_num_buckets": 32,
29
  "tie_word_embeddings": false,
30
- "torch_dtype": "float16",
31
- "transformers_version": "4.48.3",
32
  "use_cache": true,
33
  "vocab_size": 32128
34
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
 
3
  "architectures": [
4
  "T5EncoderModel"
5
  ],
 
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
28
  "tie_word_embeddings": false,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.51.3",
31
  "use_cache": true,
32
  "vocab_size": 32128
33
  }
transformer/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "SD3Transformer2DModel",
3
  "_diffusers_version": "0.33.1",
4
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/transformer",
5
  "attention_head_dim": 64,
6
  "caption_projection_dim": 1536,
7
  "dual_attention_layers": [
 
1
  {
2
  "_class_name": "SD3Transformer2DModel",
3
  "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "/home/waheedbrown/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/transformer",
5
  "attention_head_dim": 64,
6
  "caption_projection_dim": 1536,
7
  "dual_attention_layers": [
vae_decoder/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.33.1",
4
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "/home/waheedbrown/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
vae_encoder/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.33.1",
4
- "_name_or_path": "/home/azureuser/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "/home/waheedbrown/.cache/huggingface/hub/models--stabilityai--stable-diffusion-3.5-medium/snapshots/b940f670f0eda2d07fbb75229e779da1ad11eb80/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,