| name: cc12m_64x64 |
| dataset_config: configs/datasets/cc12m.yaml |
|
|
| |
| batch_size: 32 |
| num_eval_batches: 500 |
| sample_image_size: 64 |
| test_file_list: validation.tsv |
| reader_config_file: launch_scripts/reader/latest_eval.yaml |
|
|
| |
| min_examples: 10000 |
| sample_dir: /mnt/data/samples |
| batch_size: 32 |
| sample_image_size: 64 |
| test_file_list: validation.tsv |
| device: cuda |
| model: unet |
|
|
| output_dir: /mnt/data/outputs |
| num_diffusion_steps: 1000 |
| reproject_signal: false |
| predict_variances: false |
| model_output_scale: 0 |
| prediction_type: V_PREDICTION |
| loss_target_type: HA_STYLE |
| schedule_type: DEEPFLOYD |
| prediction_length: 129 |
| use_vdm_loss_weights: false |
| loss_factor: 1 |
| num_training_steps: 5000 |
| num_epochs: 20000 |
| avg_lm_steps: 0 |
| use_lm_mask: 1 |
| categorical_conditioning: 0 |
| vocab_file: t5.vocab |
| text_model: google/flan-t5-xl |
| vision_model: unet |
|
|
| unet_config: |
| num_resnets_per_resolution: [2, 2, 2] |
| attention_levels: [1, 2] |
| num_attention_layers: [0, 1, 5] |
| conditioning_feature_dim: -1 |
| conditioning_feature_proj_dim: 2048 |
| num_lm_head_layers: 0 |
| masked_cross_attention: 0 |
| resolution_channels: [256, 512, 768] |
| skip_mid_blocks: False |
| skip_cond_emb: False |
| nesting: False |
| micro_conditioning: 'scale:64' |
| temporal_mode: False |
| temporal_spatial_ds: False |
| temporal_positional_encoding: False |
| resnet_config: |
| num_channels: -1 |
| output_channels: -1 |
| num_groups_norm: 32 |
| dropout: 0.0 |
| use_attention_ffn: True |
| diffusion_config: |
| sampler_config: |
| num_diffusion_steps: 1000 |
| reproject_signal: False |
| schedule_type: DEEPFLOYD |
| prediction_type: V_PREDICTION |
| loss_target_type: DDPM |
| beta_start: 0.0001 |
| beta_end: 0.02 |
| threshold_function: CLIP |
| rescale_schedule: 1.0 |
| schedule_shifted: False |
| model_output_scale: 0.0 |
| use_vdm_loss_weights: False |
|
|
| reader_config: |
| reader_buffer_size: 500 |
| shuffle_buffer_size: 500 |
| image_size: 64 |
| smaller_side_size: 64 |
| random_crop: false |
| max_caption_length: 512 |
| max_token_length: 128 |
| num_readers: 16 |
|
|
| metrics: fid,clip |
| batch_size: 32 |
| gradient_clip_norm: 2 |
| num_gradient_accumulations: 1 |
| warmup_steps: 10000 |
| use_adamw: true |
| log_freq: 50 |
| save_freq: 5000 |
| lr: 5.0e-05 |
| fp16: 0 |
| use_precomputed_text_embeddings: 0 |
| seed: -1 |
|
|