{ "pipeline_type": "ZImageMLXPipeline", "base_model": "Tongyi-MAI/Z-Image-Turbo", "framework": "mlx", "model": { "total_params": "10.26B", "text_encoder": { "type": "Qwen3", "params": "4.02B", "hidden_size": 2560, "num_layers": 36, "num_attention_heads": 32, "num_key_value_heads": 8, "dtype": "bfloat16" }, "transformer": { "type": "ZImageTransformer (S3-DiT)", "params": "6.15B", "dim": 3840, "n_heads": 30, "head_dim": 128, "n_layers": 30, "n_refiner_layers": 2, "ffn_dim": 10240, "in_channels": 16, "patch_size": 2, "dtype": "bfloat16" }, "vae": { "type": "AutoencoderKL Decoder", "params": "84M", "latent_channels": 16, "block_out_channels": [128, 256, 512, 512], "scaling_factor": 0.3611, "shift_factor": 0.1159, "dtype": "float32" }, "scheduler": { "type": "FlowMatchEulerDiscrete", "shift": 3.0, "num_train_timesteps": 1000 } }, "quantization": { "supported_bits": [4, 8, 16], "default_bits": 16, "group_size": 64, "min_quantize_dim": 1024 }, "generation_defaults": { "width": 512, "height": 512, "num_steps": 8, "guidance_scale": 0.0, "max_text_len": 256 } }