{ "modality_dims": { "clip_l": 768, "clip_g": 1280, "t5_xl": 2048 }, "latent_dim": 2048, "seq_len": 77, "encoder_layers": 3, "decoder_layers": 3, "hidden_dim": 1024, "dropout": 0.1, "fusion_strategy": "cantor", "fusion_heads": 8, "fusion_dropout": 0.1, "beta_kl": 0.1, "beta_reconstruction": 1.0, "beta_cross_modal": 0.1, "recon_type": "mse", "modality_recon_weights": { "clip_l": 1.0, "clip_g": 1.0, "t5_xl": 0.3 }, "cross_modal_projection_dim": 768, "use_kl_annealing": true, "kl_anneal_epochs": 5, "kl_start_beta": 0.0, "batch_size": 64, "num_epochs": 50, "learning_rate": 0.0001, "weight_decay": 1e-05, "gradient_clip": 1.0, "use_scheduler": true, "scheduler_type": "cosine", "num_samples": 50000, "synthetic_ratio": 0.05, "checkpoint_dir": "./checkpoints_lyra_sdxl_t5xl", "save_every": 2500, "keep_last_n": 3, "hf_repo": "AbstractPhil/vae-lyra-sdxl-t5xl", "push_to_hub": true, "push_every": 5000, "auto_load_from_hub": true, "use_wandb": false, "wandb_project": "vae-lyra-sdxl-t5xl", "wandb_entity": null, "log_every": 50, "device": "cuda", "mixed_precision": true, "seed": 42, "num_workers": 0 }