{ "model": { "name": "AKASHA", "version": "1.0", "tokenizer": { "image_size": 256, "patch_size": 8, "num_tokens": 1024, "codebook_dim": 256, "encoder_hidden_dims": [64, 128, 256, 512], "decoder_hidden_dims": [512, 256, 128, 64], "commitment_cost": 0.25, "num_residual_blocks": 2 }, "transformer": { "num_layers": 24, "d_model": 1024, "num_heads": 16, "d_ff": 4096, "dropout_rate": 0.1, "max_sequence_length": 1024, "vocab_size": 1024, "use_rotary_embeddings": true }, "generation": { "temperature": 0.9, "top_k": 100, "top_p": 0.95 } }, "training": { "batch_size": 32, "learning_rate": 3e-4, "warmup_steps": 4000, "total_steps": 500000, "weight_decay": 0.01, "gradient_clip_norm": 1.0, "mixed_precision": true, "stage1": { "epochs": 100, "learning_rate": 1e-4, "batch_size": 64 }, "stage2": { "epochs": 200, "learning_rate": 3e-4, "batch_size": 32 } }, "data": { "dataset": "imagenet", "image_size": 256, "augmentation": true }, "huggingface": { "repo_id": "vedaco/AKASHA", "space_sdk": "gradio" } }