{
  "modality_dims": {
    "clip_l": 768,
    "clip_g": 1280,
    "t5_xl": 2048
  },
  "latent_dim": 2048,
  "seq_len": 77,
  "encoder_layers": 3,
  "decoder_layers": 3,
  "hidden_dim": 1024,
  "dropout": 0.1,
  "fusion_strategy": "cantor",
  "fusion_heads": 8,
  "fusion_dropout": 0.1,
  "beta_kl": 0.1,
  "beta_reconstruction": 1.0,
  "beta_cross_modal": 0.1,
  "recon_type": "mse",
  "modality_recon_weights": {
    "clip_l": 1.0,
    "clip_g": 1.0,
    "t5_xl": 0.3
  },
  "cross_modal_projection_dim": 768,
  "use_kl_annealing": true,
  "kl_anneal_epochs": 5,
  "kl_start_beta": 0.0,
  "batch_size": 64,
  "num_epochs": 50,
  "learning_rate": 0.0001,
  "weight_decay": 1e-05,
  "gradient_clip": 1.0,
  "use_scheduler": true,
  "scheduler_type": "cosine",
  "num_samples": 50000,
  "synthetic_ratio": 0.05,
  "checkpoint_dir": "./checkpoints_lyra_sdxl_t5xl",
  "save_every": 2500,
  "keep_last_n": 3,
  "hf_repo": "AbstractPhil/vae-lyra-sdxl-t5xl",
  "push_to_hub": true,
  "push_every": 5000,
  "auto_load_from_hub": true,
  "use_wandb": false,
  "wandb_project": "vae-lyra-sdxl-t5xl",
  "wandb_entity": null,
  "log_every": 50,
  "device": "cuda",
  "mixed_precision": true,
  "seed": 42,
  "num_workers": 0
}