| { | |
| "model_type": "i3-clip", | |
| "architecture": { | |
| "d_model": 768, | |
| "vision_encoder": { | |
| "type": "resnet-bottleneck", | |
| "layers": [3, 4, 6, 3], | |
| "input_resolution": 224, | |
| "output_dim": 2048, | |
| "stem_channels": 64 | |
| }, | |
| "text_encoder": { | |
| "type": "hybrid-rwkv-transformer", | |
| "vocab_size": 49408, | |
| "max_position_embeddings": 77, | |
| "n_rwkv_layers": 12, | |
| "n_attn_layers": 4, | |
| "n_heads": 12, | |
| "ffn_multiplier": 4 | |
| } | |
| }, | |
| "training_params": { | |
| "batch_size": 32, | |
| "learning_rate": 5e-05, | |
| "optimizer": "AdamW", | |
| "logit_scale_init": 2.659, | |
| "max_steps": 2000 | |
| }, | |
| "dataset": { | |
| "name": "midjourney-detailed-prompts", | |
| "image_size": [224, 224], | |
| "normalization": { | |
| "mean": [0.48, 0.45, 0.40], | |
| "std": [0.26, 0.26, 0.27] | |
| } | |
| } | |
| } |