Enzo8930302 commited on
Commit
ddbd3c6
·
verified ·
1 Parent(s): 0837576

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +17 -9
config.yaml CHANGED
@@ -4,15 +4,15 @@ model:
4
  name: "Byte Dream"
5
  version: "1.0.0"
6
 
7
- # Model architecture parameters
8
  unet:
9
  in_channels: 4
10
  out_channels: 4
11
- block_out_channels: [320, 640, 1280, 1280]
12
- layers_per_block: 2
13
- attention_head_dim: 8
14
- cross_attention_dim: 768
15
- use_linear_projection: true
16
 
17
  scheduler:
18
  name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
@@ -30,9 +30,11 @@ model:
30
  up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
31
  latent_channels: 4
32
  sample_size: 512
 
 
33
 
34
  text_encoder:
35
- model: "openai/clip-vit-large-patch14"
36
  max_length: 77
37
 
38
  # Generation parameters
@@ -52,13 +54,19 @@ cpu_optimization:
52
  threads: -1 # -1 for all available threads
53
  memory_limit: null # null for auto, or MB value
54
 
 
 
 
 
 
 
55
  # Training parameters
56
  training:
57
  dataset_path: "./dataset"
58
  output_dir: "./models/bytedream"
59
  epochs: 100
60
- batch_size: 4
61
- gradient_accumulation_steps: 1
62
  learning_rate: 0.00001
63
  lr_scheduler: "constant_with_warmup"
64
  lr_warmup_steps: 500
 
4
  name: "Byte Dream"
5
  version: "1.0.0"
6
 
7
+ # Model architecture parameters (optimized for <10GB)
8
  unet:
9
  in_channels: 4
10
  out_channels: 4
11
+ block_out_channels: [128, 256, 512, 512]
12
+ layers_per_block: 1
13
+ attention_head_dim: 4
14
+ cross_attention_dim: 512 # Match CLIP ViT-B/32 output dimension
15
+ use_linear_projection: false
16
 
17
  scheduler:
18
  name: "DDIM" # Options: DDIM, PNDM, LMSDiscrete, EulerDiscrete
 
30
  up_block_types: ["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"]
31
  latent_channels: 4
32
  sample_size: 512
33
+ # Reduced channels for smaller model
34
+ block_out_channels: [64, 128, 256, 256]
35
 
36
  text_encoder:
37
+ model: "openai/clip-vit-base-patch32"
38
  max_length: 77
39
 
40
  # Generation parameters
 
54
  threads: -1 # -1 for all available threads
55
  memory_limit: null # null for auto, or MB value
56
 
57
+ # Memory optimization (12GB target)
58
+ memory_optimization:
59
+ use_gradient_checkpointing: true
60
+ mixed_precision: "fp16" # Use fp16 for reduced memory
61
+ attention_slicing: true # Slice attention to reduce peak memory
62
+
63
  # Training parameters
64
  training:
65
  dataset_path: "./dataset"
66
  output_dir: "./models/bytedream"
67
  epochs: 100
68
+ batch_size: 1 # Reduced from 4 for 12GB memory constraint
69
+ gradient_accumulation_steps: 4 # Accumulate to maintain effective batch size
70
  learning_rate: 0.00001
71
  lr_scheduler: "constant_with_warmup"
72
  lr_warmup_steps: 500