Enzo8930302
/

ByteDream

Model card Files Files and versions

ByteDream / config.yaml

Enzo8930302's picture

Upload config.yaml with huggingface_hub

ddbd3c6 verified about 1 month ago

history blame contribute delete

2.52 kB

	# Byte Dream Configuration

	model:
	name: "Byte Dream"
	version: "1.0.0"

	# Model architecture parameters (optimized for <10GB)
	unet:
	in_channels: 4
	out_channels: 4
	block_out_channels: [128, 256, 512, 512]
	layers_per_block: 1
	attention_head_dim: 4
	cross_attention_dim: 512 # Match CLIP ViT-B/32 output dimension
	use_linear_projection: false

	scheduler:
	name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
	num_train_timesteps: 1000
	beta_start: 0.00085
	beta_end: 0.012
	beta_schedule: "scaled_linear"
	clip_sample: false
	set_alpha_to_one: false

	vae:
	in_channels: 3
	out_channels: 3
	down_block_types: ["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"]
	up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
	latent_channels: 4
	sample_size: 512
	# Reduced channels for smaller model
	block_out_channels: [64, 128, 256, 256]

	text_encoder:
	model: "openai/clip-vit-base-patch32"
	max_length: 77

	# Generation parameters
	generation:
	width: 512
	height: 512
	num_inference_steps: 50
	guidance_scale: 7.5
	negative_prompt: "ugly, blurry, low quality, distorted, deformed"
	seed: null # null for random, or set integer

	# CPU Optimization
	cpu_optimization:
	use_openvino: false
	use_onnx: false
	precision: "fp32" # fp32 or fp16
	threads: -1 # -1 for all available threads
	memory_limit: null # null for auto, or MB value

	# Memory optimization (12GB target)
	memory_optimization:
	use_gradient_checkpointing: true
	mixed_precision: "fp16" # Use fp16 for reduced memory
	attention_slicing: true # Slice attention to reduce peak memory

	# Training parameters
	training:
	dataset_path: "./dataset"
	output_dir: "./models/bytedream"
	epochs: 100
	batch_size: 1 # Reduced from 4 for 12GB memory constraint
	gradient_accumulation_steps: 4 # Accumulate to maintain effective batch size
	learning_rate: 0.00001
	lr_scheduler: "constant_with_warmup"
	lr_warmup_steps: 500
	max_grad_norm: 1.0
	mixed_precision: "no" # no, fp16, bf16

	# Data augmentation
	random_flip: true
	random_crop: false
	center_crop: true

	# Logging
	logging_dir: "./logs"
	log_every_n_steps: 10

	# Hugging Face
	huggingface:
	organization: "" # Your HF username/organization
	private: false
	push_to_hub: true