Zero-Shot Image Classification
PyTorch
i3-clip
File size: 985 Bytes
254d5e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
{
    "model_type": "i3-clip",
    "architecture": {
        "d_model": 768,
        "vision_encoder": {
            "type": "resnet-bottleneck",
            "layers": [3, 4, 6, 3],
            "input_resolution": 224,
            "output_dim": 2048,
            "stem_channels": 64
        },
        "text_encoder": {
            "type": "hybrid-rwkv-transformer",
            "vocab_size": 49408,
            "max_position_embeddings": 77,
            "n_rwkv_layers": 12,
            "n_attn_layers": 4,
            "n_heads": 12,
            "ffn_multiplier": 4
        }
    },
    "training_params": {
        "batch_size": 32,
        "learning_rate": 5e-05,
        "optimizer": "AdamW",
        "logit_scale_init": 2.659,
        "max_steps": 2000
    },
    "dataset": {
        "name": "midjourney-detailed-prompts",
        "image_size": [224, 224],
        "normalization": {
            "mean": [0.48, 0.45, 0.40],
            "std": [0.26, 0.26, 0.27]
        }
    }
}