| { |
| "model": { |
| "name": "AKASHA", |
| "version": "1.0", |
| "tokenizer": { |
| "image_size": 256, |
| "patch_size": 8, |
| "num_tokens": 1024, |
| "codebook_dim": 256, |
| "encoder_hidden_dims": [64, 128, 256, 512], |
| "decoder_hidden_dims": [512, 256, 128, 64], |
| "commitment_cost": 0.25, |
| "num_residual_blocks": 2 |
| }, |
| "transformer": { |
| "num_layers": 24, |
| "d_model": 1024, |
| "num_heads": 16, |
| "d_ff": 4096, |
| "dropout_rate": 0.1, |
| "max_sequence_length": 1024, |
| "vocab_size": 1024, |
| "use_rotary_embeddings": true |
| }, |
| "generation": { |
| "temperature": 0.9, |
| "top_k": 100, |
| "top_p": 0.95 |
| } |
| }, |
| "training": { |
| "batch_size": 32, |
| "learning_rate": 3e-4, |
| "warmup_steps": 4000, |
| "total_steps": 500000, |
| "weight_decay": 0.01, |
| "gradient_clip_norm": 1.0, |
| "mixed_precision": true, |
| "stage1": { |
| "epochs": 100, |
| "learning_rate": 1e-4, |
| "batch_size": 64 |
| }, |
| "stage2": { |
| "epochs": 200, |
| "learning_rate": 3e-4, |
| "batch_size": 32 |
| } |
| }, |
| "data": { |
| "dataset": "imagenet", |
| "image_size": 256, |
| "augmentation": true |
| }, |
| "huggingface": { |
| "repo_id": "vedaco/AKASHA", |
| "space_sdk": "gradio" |
| } |
| } |