rahul7star commited on
Commit
ddbd318
·
verified ·
1 Parent(s): 1047d77

Add README.md from tiny-random/z-image

Browse files
Files changed (1) hide show
  1. README.md +144 -0
README.md ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: Diffusers
3
+ pipeline_tag: text-to-image
4
+ inference: true
5
+ base_model:
6
+ - Tongyi-MAI/Z-Image-Turbo
7
+ ---
8
+
9
+ This tiny model is for debugging. It is randomly initialized with the config adapted from [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo).
10
+
11
+ File size:
12
+ - 2.4MB text_encoder/model.safetensors
13
+ - 1.4MB transformer/diffusion_pytorch_model.safetensors
14
+ - 0.5MB vae/diffusion_pytorch_model.safetensors
15
+
16
+ ### Example usage:
17
+
18
+ ```python
19
+ import torch
20
+ from diffusers import ZImagePipeline
21
+
22
+ model_id = "tiny-random/z-image"
23
+ torch_dtype = torch.bfloat16
24
+ device = "cuda"
25
+ pipe = ZImagePipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
26
+ pipe = pipe.to(device)
27
+
28
+ prompt = "Flowers and trees"
29
+ image = pipe(
30
+ prompt=prompt,
31
+ height=1024,
32
+ width=1024,
33
+ num_inference_steps=9, # This actually results in 8 DiT forwards
34
+ guidance_scale=0.0, # Guidance should be 0 for the Turbo models
35
+ generator=torch.Generator("cuda").manual_seed(42),
36
+ ).images[0]
37
+ print(image)
38
+ ```
39
+
40
+ ### Codes to create this repo:
41
+
42
+ ```python
43
+ import json
44
+
45
+ import torch
46
+ from diffusers import (
47
+ AutoencoderKL,
48
+ DiffusionPipeline,
49
+ FlowMatchEulerDiscreteScheduler,
50
+ ZImagePipeline,
51
+ ZImageTransformer2DModel,
52
+ )
53
+ from huggingface_hub import hf_hub_download
54
+ from transformers import AutoConfig, AutoTokenizer, Qwen2Tokenizer, Qwen3Model
55
+ from transformers.generation import GenerationConfig
56
+
57
+ source_model_id = "Tongyi-MAI/Z-Image-Turbo"
58
+ save_folder = "/tmp/tiny-random/z-image"
59
+
60
+ torch.set_default_dtype(torch.bfloat16)
61
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
62
+ source_model_id, subfolder='scheduler')
63
+ tokenizer = AutoTokenizer.from_pretrained(
64
+ source_model_id, subfolder='tokenizer')
65
+
66
+ def save_json(path, obj):
67
+ import json
68
+ from pathlib import Path
69
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
70
+ with open(path, 'w', encoding='utf-8') as f:
71
+ json.dump(obj, f, indent=2, ensure_ascii=False)
72
+
73
+ def init_weights(model):
74
+ import torch
75
+ torch.manual_seed(42)
76
+ with torch.no_grad():
77
+ for name, p in sorted(model.named_parameters()):
78
+ torch.nn.init.normal_(p, 0, 0.1)
79
+ print(name, p.shape, p.dtype, p.device)
80
+
81
+ with open(hf_hub_download(source_model_id, filename='text_encoder/config.json', repo_type='model'), 'r', encoding='utf - 8') as f:
82
+ config = json.load(f)
83
+ config.update({
84
+ "head_dim": 32,
85
+ 'hidden_size': 8,
86
+ 'intermediate_size': 32,
87
+ 'max_window_layers': 1,
88
+ 'num_attention_heads': 8,
89
+ 'num_hidden_layers': 2,
90
+ 'num_key_value_heads': 4,
91
+ 'tie_word_embeddings': True,
92
+ })
93
+ save_json(f'{save_folder}/text_encoder/config.json', config)
94
+ text_encoder_config = AutoConfig.from_pretrained(
95
+ f'{save_folder}/text_encoder')
96
+ text_encoder = Qwen3Model(text_encoder_config).to(torch.bfloat16)
97
+ generation_config = GenerationConfig.from_pretrained(
98
+ source_model_id, subfolder='text_encoder')
99
+ text_encoder.generation_config = generation_config
100
+ init_weights(text_encoder)
101
+
102
+ with open(hf_hub_download(source_model_id, filename='transformer/config.json', repo_type='model'), 'r', encoding='utf-8') as f:
103
+ config = json.load(f)
104
+ config.update({
105
+ 'dim': 64,
106
+ 'axes_dims': [8, 8, 16],
107
+ 'n_heads': 2,
108
+ 'n_kv_heads': 4,
109
+ 'n_layers': 2,
110
+ 'cap_feat_dim': 8,
111
+ 'in_channels': 8,
112
+ })
113
+ save_json(f'{save_folder}/transformer/config.json', config)
114
+ transformer_config = ZImageTransformer2DModel.load_config(
115
+ f'{save_folder}/transformer')
116
+ transformer = ZImageTransformer2DModel.from_config(
117
+ transformer_config)
118
+ init_weights(transformer)
119
+
120
+ with open(hf_hub_download(source_model_id, filename='vae/config.json', repo_type='model'), 'r', encoding='utf-8') as f:
121
+ config = json.load(f)
122
+ config.update({
123
+ 'layers_per_block': 1,
124
+ 'block_out_channels': [32, 32],
125
+ 'latent_channels': 8,
126
+ 'down_block_types': ['DownEncoderBlock2D', 'DownEncoderBlock2D'],
127
+ 'up_block_types': ['UpDecoderBlock2D', 'UpDecoderBlock2D']
128
+ })
129
+ save_json(f'{save_folder}/vae/config.json', config)
130
+ vae_config = AutoencoderKL.load_config(f'{save_folder}/vae')
131
+ vae = AutoencoderKL.from_config(vae_config)
132
+ init_weights(vae)
133
+
134
+ pipeline = ZImagePipeline(
135
+ scheduler=scheduler,
136
+ text_encoder=text_encoder,
137
+ tokenizer=tokenizer,
138
+ transformer=transformer,
139
+ vae=vae,
140
+ )
141
+ pipeline = pipeline.to(torch.bfloat16)
142
+ pipeline.save_pretrained(save_folder, safe_serialization=True)
143
+ print(pipeline)
144
+ ```