EQX55 commited on
Commit
325a569
·
verified ·
1 Parent(s): b8c8e48

Upload 66 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. ZIT/.cache/huggingface/.gitignore +1 -0
  3. ZIT/.cache/huggingface/download/.gitattributes.metadata +3 -0
  4. ZIT/.cache/huggingface/download/README.md.metadata +3 -0
  5. ZIT/.cache/huggingface/download/assets/DMDR.webp.metadata +3 -0
  6. ZIT/.cache/huggingface/download/assets/Z-Image-Gallery.pdf.metadata +3 -0
  7. ZIT/.cache/huggingface/download/assets/architecture.webp.metadata +3 -0
  8. ZIT/.cache/huggingface/download/assets/decoupled-dmd.webp.metadata +3 -0
  9. ZIT/.cache/huggingface/download/assets/leaderboard.png.metadata +3 -0
  10. ZIT/.cache/huggingface/download/assets/leaderboard.webp.metadata +3 -0
  11. ZIT/.cache/huggingface/download/assets/reasoning.png.metadata +3 -0
  12. ZIT/.cache/huggingface/download/assets/showcase.jpg.metadata +3 -0
  13. ZIT/.cache/huggingface/download/assets/showcase_editing.png.metadata +3 -0
  14. ZIT/.cache/huggingface/download/assets/showcase_realistic.png.metadata +3 -0
  15. ZIT/.cache/huggingface/download/assets/showcase_rendering.png.metadata +3 -0
  16. ZIT/.cache/huggingface/download/model_index.json.metadata +3 -0
  17. ZIT/.cache/huggingface/download/scheduler/scheduler_config.json.metadata +3 -0
  18. ZIT/.cache/huggingface/download/text_encoder/config.json.metadata +3 -0
  19. ZIT/.cache/huggingface/download/text_encoder/generation_config.json.metadata +3 -0
  20. ZIT/.cache/huggingface/download/text_encoder/model-00001-of-00003.safetensors.metadata +3 -0
  21. ZIT/.cache/huggingface/download/text_encoder/model-00002-of-00003.safetensors.metadata +3 -0
  22. ZIT/.cache/huggingface/download/text_encoder/model-00003-of-00003.safetensors.metadata +3 -0
  23. ZIT/.cache/huggingface/download/text_encoder/model.safetensors.index.json.metadata +3 -0
  24. ZIT/.cache/huggingface/download/tokenizer/merges.txt.metadata +3 -0
  25. ZIT/.cache/huggingface/download/tokenizer/tokenizer.json.metadata +3 -0
  26. ZIT/.cache/huggingface/download/tokenizer/tokenizer_config.json.metadata +3 -0
  27. ZIT/.cache/huggingface/download/tokenizer/vocab.json.metadata +3 -0
  28. ZIT/.cache/huggingface/download/transformer/config.json.metadata +3 -0
  29. ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00001-of-00003.safetensors.metadata +3 -0
  30. ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00002-of-00003.safetensors.metadata +3 -0
  31. ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00003-of-00003.safetensors.metadata +3 -0
  32. ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model.safetensors.index.json.metadata +3 -0
  33. ZIT/.cache/huggingface/download/vae/config.json.metadata +3 -0
  34. ZIT/.cache/huggingface/download/vae/diffusion_pytorch_model.safetensors.metadata +3 -0
  35. ZIT/.gitattributes +46 -0
  36. ZIT/README.md +197 -0
  37. ZIT/assets/DMDR.webp +3 -0
  38. ZIT/assets/Z-Image-Gallery.pdf +3 -0
  39. ZIT/assets/architecture.webp +3 -0
  40. ZIT/assets/decoupled-dmd.webp +3 -0
  41. ZIT/assets/leaderboard.png +3 -0
  42. ZIT/assets/leaderboard.webp +0 -0
  43. ZIT/assets/reasoning.png +3 -0
  44. ZIT/assets/showcase.jpg +3 -0
  45. ZIT/assets/showcase_editing.png +3 -0
  46. ZIT/assets/showcase_realistic.png +3 -0
  47. ZIT/assets/showcase_rendering.png +3 -0
  48. ZIT/model_index.json +24 -0
  49. ZIT/scheduler/scheduler_config.json +7 -0
  50. ZIT/text_encoder/config.json +30 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ZIT/assets/architecture.webp filter=lfs diff=lfs merge=lfs -text
37
+ ZIT/assets/decoupled-dmd.webp filter=lfs diff=lfs merge=lfs -text
38
+ ZIT/assets/DMDR.webp filter=lfs diff=lfs merge=lfs -text
39
+ ZIT/assets/leaderboard.png filter=lfs diff=lfs merge=lfs -text
40
+ ZIT/assets/reasoning.png filter=lfs diff=lfs merge=lfs -text
41
+ ZIT/assets/showcase_editing.png filter=lfs diff=lfs merge=lfs -text
42
+ ZIT/assets/showcase_realistic.png filter=lfs diff=lfs merge=lfs -text
43
+ ZIT/assets/showcase_rendering.png filter=lfs diff=lfs merge=lfs -text
44
+ ZIT/assets/showcase.jpg filter=lfs diff=lfs merge=lfs -text
45
+ ZIT/assets/Z-Image-Gallery.pdf filter=lfs diff=lfs merge=lfs -text
46
+ ZIT/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
ZIT/.cache/huggingface/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *
ZIT/.cache/huggingface/download/.gitattributes.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ c88e3c116b124ca91c94af1ac21d6d0db7deaa58
3
+ 1765723842.87288
ZIT/.cache/huggingface/download/README.md.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ c1f167cae8d334b21e9291b7d040bdd365a310a9
3
+ 1765723843.0835247
ZIT/.cache/huggingface/download/assets/DMDR.webp.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 2e6f3053b98d097f2aa11d3892bd9307326db41b65336bea54dc5825a0e03077
3
+ 1765723843.6776736
ZIT/.cache/huggingface/download/assets/Z-Image-Gallery.pdf.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 6f9895b3246d2547bac74bbe0be975da500eaae93f2cad4248ad3281786b1ac6
3
+ 1765723843.7332432
ZIT/.cache/huggingface/download/assets/architecture.webp.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 261af62ecc7e9749ae28e1d3a84e2f70a6c192d2017b7d8f020c7bff982ef59c
3
+ 1765723843.6776736
ZIT/.cache/huggingface/download/assets/decoupled-dmd.webp.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 4568ca559b997fc38f57dc1c3f5b1da3a3c144ae12419caa855ced972bf8c7aa
3
+ 1765723843.5603287
ZIT/.cache/huggingface/download/assets/leaderboard.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ e9fd4aa185bb7bff2b5515f2001b4d80df330595e78d6a098142e5a232bb4e4e
3
+ 1765723843.5428705
ZIT/.cache/huggingface/download/assets/leaderboard.webp.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 67cbfa6780584dce044144fb86afe495e3147abd
3
+ 1765723842.9801419
ZIT/.cache/huggingface/download/assets/reasoning.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 96c16b2c8d8dc67bb92ecc22d54b9955ab55136977f515bb76f4b2eb42eb3cdb
3
+ 1765723843.8256502
ZIT/.cache/huggingface/download/assets/showcase.jpg.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ f6ee74e066e00596e429f5a08140aebae1678e5935ce1e11ca6c1c6cd72432ee
3
+ 1765723843.7191947
ZIT/.cache/huggingface/download/assets/showcase_editing.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 7d720c3157fd0b0c1f07ac826c6d380b4bcb1b6933c64eb11bfe804ccf7c26f4
3
+ 1765723843.9885721
ZIT/.cache/huggingface/download/assets/showcase_realistic.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 697e6f6857f619314173508df72a14314cbb43e67475de7494123bb8b4f4eb2c
3
+ 1765723844.1912699
ZIT/.cache/huggingface/download/assets/showcase_rendering.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 3556dd66be2200d53f957424e12ecf914ddf3eded151cde86c7353f8b231284f
3
+ 1765723844.0754724
ZIT/.cache/huggingface/download/model_index.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 570c63a7c5580f63d0a8ed622324f50015dcf914
3
+ 1765723844.1922703
ZIT/.cache/huggingface/download/scheduler/scheduler_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 2349bc43351afa730341d07dd44fce4ace1c4257
3
+ 1765723844.1681695
ZIT/.cache/huggingface/download/text_encoder/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ e49eccdc32f36da9c09cfa0e737084f9e0105e5e
3
+ 1765723844.851518
ZIT/.cache/huggingface/download/text_encoder/generation_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 20a8a9156fc8c3f25295ca067f61fdf120d517c5
3
+ 1765723844.1373098
ZIT/.cache/huggingface/download/text_encoder/model-00001-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 328a91d3122359d5547f9d79521205bc0a46e1f79a792dfe650e99fc2d651223
3
+ 1765723954.0234396
ZIT/.cache/huggingface/download/text_encoder/model-00002-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 6cd087b316306a68c562436b5492edbcf6e16c6dba3a1308279caa5a58e21ca5
3
+ 1765723942.6103716
ZIT/.cache/huggingface/download/text_encoder/model-00003-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 7ca841ee75b9c61267c0c6148fd8d096d3d21b6d3e161256a9b878154f91fc52
3
+ 1765723857.2813468
ZIT/.cache/huggingface/download/text_encoder/model.safetensors.index.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 95c0a0059df040d75dc6c396b174382cf61d2f91
3
+ 1765723844.8750381
ZIT/.cache/huggingface/download/tokenizer/merges.txt.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 31349551d90c7606f325fe0f11bbb8bd5fa0d7c7
3
+ 1765723845.914382
ZIT/.cache/huggingface/download/tokenizer/tokenizer.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ 1765723848.264321
ZIT/.cache/huggingface/download/tokenizer/tokenizer_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 417d038a63fa3de29cfde265caedae14d1a58d92
3
+ 1765723844.6357412
ZIT/.cache/huggingface/download/tokenizer/vocab.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 4783fe10ac3adce15ac8f358ef5462739852c569
3
+ 1765723847.7064893
ZIT/.cache/huggingface/download/transformer/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ c579190a5e03d602a2fd9647221c9d0d9441f150
3
+ 1765723845.354094
ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00001-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 95facd593e2549e8252acb571c653d57f7ddb7f1060d4e81712f152555a88804
3
+ 1765724011.9914029
ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00002-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ a4bbe43ee184a1fb5af4b412d27555f532893bdc3165b1149e304ed82b5d7015
3
+ 1765724012.0843074
ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model-00003-of-00003.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ aba4e37a590e63210878160a718d916d80398f4e1f78ab6c9b2b2a00d92769fa
3
+ 1765723986.1461906
ZIT/.cache/huggingface/download/transformer/diffusion_pytorch_model.safetensors.index.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ a3cadc1ed28e334ef58d4e6bbda107e25168ef47
3
+ 1765723848.283522
ZIT/.cache/huggingface/download/vae/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ 894fa30ac1950cef422189d2d4cef11043c62875
3
+ 1765723848.7703586
ZIT/.cache/huggingface/download/vae/diffusion_pytorch_model.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 5f4b9cbb80cc95ba44fe6667dfd75710f7db2947
2
+ f5b59a26851551b67ae1fe58d32e76486e1e812def4696a4bea97f16604d40a3
3
+ 1765723896.6316795
ZIT/.gitattributes ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ assets/DMDR.webp filter=lfs diff=lfs merge=lfs -text
38
+ assets/architecture.webp filter=lfs diff=lfs merge=lfs -text
39
+ assets/decoupled-dmd.webp filter=lfs diff=lfs merge=lfs -text
40
+ assets/reasoning.png filter=lfs diff=lfs merge=lfs -text
41
+ assets/showcase.jpg filter=lfs diff=lfs merge=lfs -text
42
+ assets/showcase_editing.png filter=lfs diff=lfs merge=lfs -text
43
+ assets/showcase_realistic.png filter=lfs diff=lfs merge=lfs -text
44
+ assets/showcase_rendering.png filter=lfs diff=lfs merge=lfs -text
45
+ assets/Z-Image-Gallery.pdf filter=lfs diff=lfs merge=lfs -text
46
+ assets/leaderboard.png filter=lfs diff=lfs merge=lfs -text
ZIT/README.md ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: text-to-image
6
+ library_name: diffusers
7
+ ---
8
+
9
+
10
+ <h1 align="center">⚡️- Image<br><sub><sup>An Efficient Image Generation Foundation Model with Single-Stream Diffusion Transformer</sup></sub></h1>
11
+
12
+ <div align="center">
13
+
14
+ [![Official Site](https://img.shields.io/badge/Official%20Site-333399.svg?logo=homepage)](https://tongyi-mai.github.io/Z-Image-blog/)&#160;
15
+ [![GitHub](https://img.shields.io/badge/GitHub-Z--Image-181717?logo=github&logoColor=white)](https://github.com/Tongyi-MAI/Z-Image)&#160;
16
+ [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Checkpoint-Z--Image--Turbo-yellow)](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo)&#160;
17
+ [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Online_Demo-Z--Image--Turbo-blue)](https://huggingface.co/spaces/Tongyi-MAI/Z-Image-Turbo)&#160;
18
+ [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Mobile_Demo-Z--Image--Turbo-red)](https://huggingface.co/spaces/akhaliq/Z-Image-Turbo)&#160;
19
+ [![ModelScope Model](https://img.shields.io/badge/🤖%20Checkpoint-Z--Image--Turbo-624aff)](https://www.modelscope.cn/models/Tongyi-MAI/Z-Image-Turbo)&#160;
20
+ [![ModelScope Space](https://img.shields.io/badge/🤖%20Online_Demo-Z--Image--Turbo-17c7a7)](https://www.modelscope.cn/aigc/imageGeneration?tab=advanced&versionId=469191&modelType=Checkpoint&sdVersion=Z_IMAGE_TURBO&modelUrl=modelscope%253A%252F%252FTongyi-MAI%252FZ-Image-Turbo%253Frevision%253Dmaster%7D%7BOnline)&#160;
21
+ [![Art Gallery PDF](https://img.shields.io/badge/%F0%9F%96%BC%20Art_Gallery-PDF-ff69b4)](assets/Z-Image-Gallery.pdf)&#160;
22
+ [![Web Art Gallery](https://img.shields.io/badge/%F0%9F%8C%90%20Web_Art_Gallery-online-00bfff)](https://modelscope.cn/studios/Tongyi-MAI/Z-Image-Gallery/summary)&#160;
23
+ <a href="https://arxiv.org/abs/2511.22699" target="_blank"><img src="https://img.shields.io/badge/Report-b5212f.svg?logo=arxiv" height="21px"></a>
24
+
25
+
26
+ Welcome to the official repository for the Z-Image(造相)project!
27
+
28
+ </div>
29
+
30
+
31
+
32
+ ## ✨ Z-Image
33
+
34
+ Z-Image is a powerful and highly efficient image generation model with **6B** parameters. Currently there are three variants:
35
+
36
+ - 🚀 **Z-Image-Turbo** – A distilled version of Z-Image that matches or exceeds leading competitors with only **8 NFEs** (Number of Function Evaluations). It offers **⚡️sub-second inference latency⚡️** on enterprise-grade H800 GPUs and fits comfortably within **16G VRAM consumer devices**. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.
37
+
38
+ - 🧱 **Z-Image-Base** – The non-distilled foundation model. By releasing this checkpoint, we aim to unlock the full potential for community-driven fine-tuning and custom development.
39
+
40
+ - ✍️ **Z-Image-Edit** – A variant fine-tuned on Z-Image specifically for image editing tasks. It supports creative image-to-image generation with impressive instruction-following capabilities, allowing for precise edits based on natural language prompts.
41
+
42
+ ### 📥 Model Zoo
43
+
44
+ | Model | Hugging Face | ModelScope |
45
+ | :--- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
46
+ | **Z-Image-Turbo** | [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Checkpoint%20-Z--Image--Turbo-yellow)](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) <br> [![Hugging Face Space](https://img.shields.io/badge/%F0%9F%A4%97%20Online%20Demo-Z--Image--Turbo-blue)](https://huggingface.co/spaces/Tongyi-MAI/Z-Image-Turbo) | [![ModelScope Model](https://img.shields.io/badge/🤖%20%20Checkpoint-Z--Image--Turbo-624aff)](https://www.modelscope.cn/models/Tongyi-MAI/Z-Image-Turbo) <br> [![ModelScope Space](https://img.shields.io/badge/%F0%9F%A4%96%20Online%20Demo-Z--Image--Turbo-17c7a7)](https://www.modelscope.cn/aigc/imageGeneration?tab=advanced&versionId=469191&modelType=Checkpoint&sdVersion=Z_IMAGE_TURBO&modelUrl=modelscope%3A%2F%2FTongyi-MAI%2FZ-Image-Turbo%3Frevision%3Dmaster) |
47
+ | **Z-Image-Base** | *To be released* | *To be released* |
48
+ | **Z-Image-Edit** | *To be released* | *To be released* |
49
+
50
+ ### 🖼️ Showcase
51
+
52
+ 📸 **Photorealistic Quality**: **Z-Image-Turbo** delivers strong photorealistic image generation while maintaining excellent aesthetic quality.
53
+
54
+ ![Showcase of Z-Image on Photo-realistic image Generation](assets/showcase_realistic.png)
55
+
56
+ 📖 **Accurate Bilingual Text Rendering**: **Z-Image-Turbo** excels at accurately rendering complex Chinese and English text.
57
+
58
+ ![Showcase of Z-Image on Bilingual Text Rendering](assets/showcase_rendering.png)
59
+
60
+ 💡 **Prompt Enhancing & Reasoning**: Prompt Enhancer empowers the model with reasoning capabilities, enabling it to transcend surface-level descriptions and tap into underlying world knowledge.
61
+
62
+ ![reasoning.jpg](assets/reasoning.png)
63
+
64
+ 🧠 **Creative Image Editing**: **Z-Image-Edit** shows a strong understanding of bilingual editing instructions, enabling imaginative and flexible image transformations.
65
+
66
+ ![Showcase of Z-Image-Edit on Image Editing](assets/showcase_editing.png)
67
+
68
+ ### 🏗️ Model Architecture
69
+ We adopt a **Scalable Single-Stream DiT** (S3-DiT) architecture. In this setup, text, visual semantic tokens, and image VAE tokens are concatenated at the sequence level to serve as a unified input stream, maximizing parameter efficiency compared to dual-stream approaches.
70
+
71
+ ![Architecture of Z-Image and Z-Image-Edit](assets/architecture.webp)
72
+
73
+ ### 📈 Performance
74
+ According to the Elo-based Human Preference Evaluation (on [*Alibaba AI Arena*](https://aiarena.alibaba-inc.com/corpora/arena/leaderboard?arenaType=T2I)), Z-Image-Turbo shows highly competitive performance against other leading models, while achieving state-of-the-art results among open-source models.
75
+
76
+ <p align="center">
77
+ <a href="https://aiarena.alibaba-inc.com/corpora/arena/leaderboard?arenaType=T2I">
78
+ <img src="assets/leaderboard.png" alt="Z-Image Elo Rating on AI Arena"/><br />
79
+ <span style="font-size:1.05em; cursor:pointer; text-decoration:underline;"> Click to view the full leaderboard</span>
80
+ </a>
81
+ </p>
82
+
83
+ ### 🚀 Quick Start
84
+ Install the latest version of diffusers, use the following command:
85
+ <details>
86
+ <summary><sup>Click here for details for why you need to install diffusers from source</sup></summary>
87
+
88
+ We have submitted two pull requests ([#12703](https://github.com/huggingface/diffusers/pull/12703) and [#12715](https://github.com/huggingface/diffusers/pull/12715)) to the 🤗 diffusers repository to add support for Z-Image. Both PRs have been merged into the latest official diffusers release.
89
+ Therefore, you need to install diffusers from source for the latest features and Z-Image support.
90
+
91
+ </details>
92
+
93
+ ```bash
94
+ pip install git+https://github.com/huggingface/diffusers
95
+ ```
96
+
97
+ ```python
98
+ import torch
99
+ from diffusers import ZImagePipeline
100
+
101
+ # 1. Load the pipeline
102
+ # Use bfloat16 for optimal performance on supported GPUs
103
+ pipe = ZImagePipeline.from_pretrained(
104
+ "Tongyi-MAI/Z-Image-Turbo",
105
+ torch_dtype=torch.bfloat16,
106
+ low_cpu_mem_usage=False,
107
+ )
108
+ pipe.to("cuda")
109
+
110
+ # [Optional] Attention Backend
111
+ # Diffusers uses SDPA by default. Switch to Flash Attention for better efficiency if supported:
112
+ # pipe.transformer.set_attention_backend("flash") # Enable Flash-Attention-2
113
+ # pipe.transformer.set_attention_backend("_flash_3") # Enable Flash-Attention-3
114
+
115
+ # [Optional] Model Compilation
116
+ # Compiling the DiT model accelerates inference, but the first run will take longer to compile.
117
+ # pipe.transformer.compile()
118
+
119
+ # [Optional] CPU Offloading
120
+ # Enable CPU offloading for memory-constrained devices.
121
+ # pipe.enable_model_cpu_offload()
122
+
123
+ prompt = "Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp (⚡️), bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda (西安大雁塔), blurred colorful distant lights."
124
+
125
+ # 2. Generate Image
126
+ image = pipe(
127
+ prompt=prompt,
128
+ height=1024,
129
+ width=1024,
130
+ num_inference_steps=9, # This actually results in 8 DiT forwards
131
+ guidance_scale=0.0, # Guidance should be 0 for the Turbo models
132
+ generator=torch.Generator("cuda").manual_seed(42),
133
+ ).images[0]
134
+
135
+ image.save("example.png")
136
+ ```
137
+
138
+ ## 🔬 Decoupled-DMD: The Acceleration Magic Behind Z-Image
139
+
140
+ [![arXiv](https://img.shields.io/badge/arXiv-2511.22677-b31b1b.svg)](https://arxiv.org/abs/2511.22677)
141
+
142
+ Decoupled-DMD is the core few-step distillation algorithm that empowers the 8-step Z-Image model.
143
+
144
+ Our core insight in Decoupled-DMD is that the success of existing DMD (Distributaion Matching Distillation) methods is the result of two independent, collaborating mechanisms:
145
+
146
+ - **CFG Augmentation (CA)**: The primary **engine** 🚀 driving the distillation process, a factor largely overlooked in previous work.
147
+ - **Distribution Matching (DM)**: Acts more as a **regularizer** ⚖️, ensuring the stability and quality of the generated output.
148
+
149
+ By recognizing and decoupling these two mechanisms, we were able to study and optimize them in isolation. This ultimately motivated us to develop an improved distillation process that significantly enhances the performance of few-step generation.
150
+
151
+ ![Diagram of Decoupled-DMD](assets/decoupled-dmd.webp)
152
+
153
+ ## 🤖 DMDR: Fusing DMD with Reinforcement Learning
154
+
155
+ [![arXiv](https://img.shields.io/badge/arXiv-2511.13649-b31b1b.svg)](https://arxiv.org/abs/2511.13649)
156
+
157
+ Building upon the strong foundation of Decoupled-DMD, our 8-step Z-Image model has already demonstrated exceptional capabilities. To achieve further improvements in terms of semantic alignment, aesthetic quality, and structural coherence—while producing images with richer high-frequency details—we present **DMDR**.
158
+
159
+ Our core insight behind DMDR is that Reinforcement Learning (RL) and Distribution Matching Distillation (DMD) can be synergistically integrated during the post-training of few-step models. We demonstrate that:
160
+
161
+ - **RL Unlocks the Performance of DMD** 🚀
162
+ - **DMD Effectively Regularizes RL** ⚖️
163
+
164
+ ![Diagram of DMDR](assets/DMDR.webp)
165
+
166
+ ## ⏬ Download
167
+ ```bash
168
+ pip install -U huggingface_hub
169
+ HF_XET_HIGH_PERFORMANCE=1 hf download Tongyi-MAI/Z-Image-Turbo
170
+ ```
171
+
172
+ ## 📜 Citation
173
+
174
+ If you find our work useful in your research, please consider citing:
175
+
176
+ ```bibtex
177
+ @article{team2025zimage,
178
+ title={Z-Image: An Efficient Image Generation Foundation Model with Single-Stream Diffusion Transformer},
179
+ author={Z-Image Team},
180
+ journal={arXiv preprint arXiv:2511.22699},
181
+ year={2025}
182
+ }
183
+
184
+ @article{liu2025decoupled,
185
+ title={Decoupled DMD: CFG Augmentation as the Spear, Distribution Matching as the Shield},
186
+ author={Dongyang Liu and Peng Gao and David Liu and Ruoyi Du and Zhen Li and Qilong Wu and Xin Jin and Sihan Cao and Shifeng Zhang and Hongsheng Li and Steven Hoi},
187
+ journal={arXiv preprint arXiv:2511.22677},
188
+ year={2025}
189
+ }
190
+
191
+ @article{jiang2025distribution,
192
+ title={Distribution Matching Distillation Meets Reinforcement Learning},
193
+ author={Jiang, Dengyang and Liu, Dongyang and Wang, Zanyi and Wu, Qilong and Jin, Xin and Liu, David and Li, Zhen and Wang, Mengmeng and Gao, Peng and Yang, Harry},
194
+ journal={arXiv preprint arXiv:2511.13649},
195
+ year={2025}
196
+ }
197
+ ```
ZIT/assets/DMDR.webp ADDED

Git LFS Details

  • SHA256: 2e6f3053b98d097f2aa11d3892bd9307326db41b65336bea54dc5825a0e03077
  • Pointer size: 131 Bytes
  • Size of remote file: 173 kB
ZIT/assets/Z-Image-Gallery.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9895b3246d2547bac74bbe0be975da500eaae93f2cad4248ad3281786b1ac6
3
+ size 15767436
ZIT/assets/architecture.webp ADDED

Git LFS Details

  • SHA256: 261af62ecc7e9749ae28e1d3a84e2f70a6c192d2017b7d8f020c7bff982ef59c
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
ZIT/assets/decoupled-dmd.webp ADDED

Git LFS Details

  • SHA256: 4568ca559b997fc38f57dc1c3f5b1da3a3c144ae12419caa855ced972bf8c7aa
  • Pointer size: 131 Bytes
  • Size of remote file: 152 kB
ZIT/assets/leaderboard.png ADDED

Git LFS Details

  • SHA256: e9fd4aa185bb7bff2b5515f2001b4d80df330595e78d6a098142e5a232bb4e4e
  • Pointer size: 132 Bytes
  • Size of remote file: 2.03 MB
ZIT/assets/leaderboard.webp ADDED
ZIT/assets/reasoning.png ADDED

Git LFS Details

  • SHA256: 96c16b2c8d8dc67bb92ecc22d54b9955ab55136977f515bb76f4b2eb42eb3cdb
  • Pointer size: 132 Bytes
  • Size of remote file: 7.7 MB
ZIT/assets/showcase.jpg ADDED

Git LFS Details

  • SHA256: f6ee74e066e00596e429f5a08140aebae1678e5935ce1e11ca6c1c6cd72432ee
  • Pointer size: 132 Bytes
  • Size of remote file: 6.43 MB
ZIT/assets/showcase_editing.png ADDED

Git LFS Details

  • SHA256: 7d720c3157fd0b0c1f07ac826c6d380b4bcb1b6933c64eb11bfe804ccf7c26f4
  • Pointer size: 132 Bytes
  • Size of remote file: 4.75 MB
ZIT/assets/showcase_realistic.png ADDED

Git LFS Details

  • SHA256: 697e6f6857f619314173508df72a14314cbb43e67475de7494123bb8b4f4eb2c
  • Pointer size: 132 Bytes
  • Size of remote file: 6.26 MB
ZIT/assets/showcase_rendering.png ADDED

Git LFS Details

  • SHA256: 3556dd66be2200d53f957424e12ecf914ddf3eded151cde86c7353f8b231284f
  • Pointer size: 132 Bytes
  • Size of remote file: 7.6 MB
ZIT/model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ZImagePipeline",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "scheduler": [
5
+ "diffusers",
6
+ "FlowMatchEulerDiscreteScheduler"
7
+ ],
8
+ "text_encoder": [
9
+ "transformers",
10
+ "Qwen3Model"
11
+ ],
12
+ "tokenizer": [
13
+ "transformers",
14
+ "Qwen2Tokenizer"
15
+ ],
16
+ "transformer": [
17
+ "diffusers",
18
+ "ZImageTransformer2DModel"
19
+ ],
20
+ "vae": [
21
+ "diffusers",
22
+ "AutoencoderKL"
23
+ ]
24
+ }
ZIT/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "num_train_timesteps": 1000,
5
+ "use_dynamic_shifting": false,
6
+ "shift": 3.0
7
+ }
ZIT/text_encoder/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 36,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }