Upload config.yaml with huggingface_hub
Browse files- config.yaml +17 -9
config.yaml
CHANGED
|
@@ -4,15 +4,15 @@ model:
|
|
| 4 |
name: "Byte Dream"
|
| 5 |
version: "1.0.0"
|
| 6 |
|
| 7 |
-
# Model architecture parameters
|
| 8 |
unet:
|
| 9 |
in_channels: 4
|
| 10 |
out_channels: 4
|
| 11 |
-
block_out_channels: [
|
| 12 |
-
layers_per_block:
|
| 13 |
-
attention_head_dim:
|
| 14 |
-
cross_attention_dim:
|
| 15 |
-
use_linear_projection:
|
| 16 |
|
| 17 |
scheduler:
|
| 18 |
name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
|
|
@@ -30,9 +30,11 @@ model:
|
|
| 30 |
up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
|
| 31 |
latent_channels: 4
|
| 32 |
sample_size: 512
|
|
|
|
|
|
|
| 33 |
|
| 34 |
text_encoder:
|
| 35 |
-
model: "openai/clip-vit-
|
| 36 |
max_length: 77
|
| 37 |
|
| 38 |
# Generation parameters
|
|
@@ -52,13 +54,19 @@ cpu_optimization:
|
|
| 52 |
threads: -1 # -1 for all available threads
|
| 53 |
memory_limit: null # null for auto, or MB value
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Training parameters
|
| 56 |
training:
|
| 57 |
dataset_path: "./dataset"
|
| 58 |
output_dir: "./models/bytedream"
|
| 59 |
epochs: 100
|
| 60 |
-
batch_size: 4
|
| 61 |
-
gradient_accumulation_steps:
|
| 62 |
learning_rate: 0.00001
|
| 63 |
lr_scheduler: "constant_with_warmup"
|
| 64 |
lr_warmup_steps: 500
|
|
|
|
| 4 |
name: "Byte Dream"
|
| 5 |
version: "1.0.0"
|
| 6 |
|
| 7 |
+
# Model architecture parameters (optimized for <10GB)
|
| 8 |
unet:
|
| 9 |
in_channels: 4
|
| 10 |
out_channels: 4
|
| 11 |
+
block_out_channels: [128, 256, 512, 512]
|
| 12 |
+
layers_per_block: 1
|
| 13 |
+
attention_head_dim: 4
|
| 14 |
+
cross_attention_dim: 512 # Match CLIP ViT-B/32 output dimension
|
| 15 |
+
use_linear_projection: false
|
| 16 |
|
| 17 |
scheduler:
|
| 18 |
name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
|
|
|
|
| 30 |
up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
|
| 31 |
latent_channels: 4
|
| 32 |
sample_size: 512
|
| 33 |
+
# Reduced channels for smaller model
|
| 34 |
+
block_out_channels: [64, 128, 256, 256]
|
| 35 |
|
| 36 |
text_encoder:
|
| 37 |
+
model: "openai/clip-vit-base-patch32"
|
| 38 |
max_length: 77
|
| 39 |
|
| 40 |
# Generation parameters
|
|
|
|
| 54 |
threads: -1 # -1 for all available threads
|
| 55 |
memory_limit: null # null for auto, or MB value
|
| 56 |
|
| 57 |
+
# Memory optimization (12GB target)
|
| 58 |
+
memory_optimization:
|
| 59 |
+
use_gradient_checkpointing: true
|
| 60 |
+
mixed_precision: "fp16" # Use fp16 for reduced memory
|
| 61 |
+
attention_slicing: true # Slice attention to reduce peak memory
|
| 62 |
+
|
| 63 |
# Training parameters
|
| 64 |
training:
|
| 65 |
dataset_path: "./dataset"
|
| 66 |
output_dir: "./models/bytedream"
|
| 67 |
epochs: 100
|
| 68 |
+
batch_size: 1 # Reduced from 4 for 12GB memory constraint
|
| 69 |
+
gradient_accumulation_steps: 4 # Accumulate to maintain effective batch size
|
| 70 |
learning_rate: 0.00001
|
| 71 |
lr_scheduler: "constant_with_warmup"
|
| 72 |
lr_warmup_steps: 500
|