accum_freq: 1 audio_ext: flac audio_fill: repeatpad audio_fusion: False audio_int16_normalize: False audio_trunc: rand_trunc audio_zeroshot_audio_key: audio audio_zeroshot_class_key: category audio_zeroshot_dataset: None audio_zeroshot_multiprocessing_context: forkserver audio_zeroshot_split: train audio_zeroshot_target_key: target audio_zeroshot_templates: None audio_zeroshot_workers: 0 aug_cfg: {} batch_size: 4096 beta1: 0.9 beta2: 0.98 cache_dir: None checkpoint_path: ./logs/ViT-B-32-Vanilla-resume/checkpoints coca_caption_loss_weight: 2.0 coca_contrastive_loss_weight: 1.0 copy_codebase: False csv_caption_key: title csv_img_key: filepath csv_separator: dataset_resampled: False dataset_type: webdataset ddp_static_graph: False debug: False delete_previous_checkpoint: False device: cuda:0 dist_backend: None dist_url: None distill: False distill_model: None distill_pretrained: None distributed: True epochs: 32 epochs_cooldown: None eps: 1e-06 force_context_length: None force_custom_text: False force_image_size: None force_naflex_vision: False force_patch_dropout: None force_quick_gelu: False fsdp: False fsdp_checkpoint: full fsdp_no_reshard_after_forward: False fsdp_offload_cpu: False gather_with_grad: True grad_checkpointing: False grad_clip_norm: None image_interpolation: None image_mean: None image_resize_mode: None image_std: None imagenet_v2: None imagenet_val: None local_loss: True local_rank: 0 lock_image: False lock_image_freeze_bn_stats: False lock_image_unlocked_groups: 0 lock_text: False lock_text_freeze_layer_norm: False lock_text_unlocked_layers: 0 log_every_n_steps: 100 log_level: 20 log_local: False log_path: ./logs/ViT-B-32-Vanilla-resume/out.log logs: ./logs/ loss_dist_impl: None lr: 0.0005 lr_cooldown_end: 0.0 lr_cooldown_power: 1.0 lr_scheduler: cosine model: ViT-B-32 momentum: None naflex_batch_divisor: 8 naflex_loss_scale: none naflex_max_image_tokens_per_batch: 16384 naflex_num_train_image_tokens: None naflex_patch_size_probs: None naflex_patch_sizes: None naflex_seq_lens: None name: ViT-B-32-Vanilla-resume no_set_device_rank: False opt: adamw precision: amp_bfloat16 pretrained: pretrained_audio: None pretrained_image: False rank: 0 remote_sync: None remote_sync_frequency: 300 remote_sync_protocol: s3 report_to: wandb resume: /scratch/work/zhul2/code/open_clip/logs/ViT-B-32-Vanilla/checkpoints/epoch_24.pt save_frequency: 1 save_most_recent: False seed: 0 siglip: False skip_scheduler: False tensorboard: False tensorboard_path: torchcompile: True torchcompile_backend: None torchcompile_mode: None torchcompile_strategy: task train_data: /scratch/shareddata/dldata/laion400M/img2dataset/laion400m-data/{00000..41407}.tar train_data_upsampling_factors: None train_num_samples: 268836185 use_bn_sync: False use_bnb_linear: None use_naflex: False val_data: None val_frequency: 1 val_num_samples: None wandb: True wandb_notes: wandb_project_name: open-clip warmup: 2000 wd: 0.2 workers: 12 world_size: 8 zeroshot_frequency: 2