ByteDream / config.yaml
Enzo8930302's picture
Upload config.yaml with huggingface_hub
ddbd3c6 verified
# Byte Dream Configuration
model:
name: "Byte Dream"
version: "1.0.0"
# Model architecture parameters (optimized for <10GB)
unet:
in_channels: 4
out_channels: 4
block_out_channels: [128, 256, 512, 512]
layers_per_block: 1
attention_head_dim: 4
cross_attention_dim: 512 # Match CLIP ViT-B/32 output dimension
use_linear_projection: false
scheduler:
name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "scaled_linear"
clip_sample: false
set_alpha_to_one: false
vae:
in_channels: 3
out_channels: 3
down_block_types: ["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"]
up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
latent_channels: 4
sample_size: 512
# Reduced channels for smaller model
block_out_channels: [64, 128, 256, 256]
text_encoder:
model: "openai/clip-vit-base-patch32"
max_length: 77
# Generation parameters
generation:
width: 512
height: 512
num_inference_steps: 50
guidance_scale: 7.5
negative_prompt: "ugly, blurry, low quality, distorted, deformed"
seed: null # null for random, or set integer
# CPU Optimization
cpu_optimization:
use_openvino: false
use_onnx: false
precision: "fp32" # fp32 or fp16
threads: -1 # -1 for all available threads
memory_limit: null # null for auto, or MB value
# Memory optimization (12GB target)
memory_optimization:
use_gradient_checkpointing: true
mixed_precision: "fp16" # Use fp16 for reduced memory
attention_slicing: true # Slice attention to reduce peak memory
# Training parameters
training:
dataset_path: "./dataset"
output_dir: "./models/bytedream"
epochs: 100
batch_size: 1 # Reduced from 4 for 12GB memory constraint
gradient_accumulation_steps: 4 # Accumulate to maintain effective batch size
learning_rate: 0.00001
lr_scheduler: "constant_with_warmup"
lr_warmup_steps: 500
max_grad_norm: 1.0
mixed_precision: "no" # no, fp16, bf16
# Data augmentation
random_flip: true
random_crop: false
center_crop: true
# Logging
logging_dir: "./logs"
log_every_n_steps: 10
# Hugging Face
huggingface:
organization: "" # Your HF username/organization
private: false
push_to_hub: true