{
  "architectures": [
    "SwinForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "depths": [
    2,
    2,
    18,
    2
  ],
  "drop_path_rate": 0.1,
  "embed_dim": 128,
  "encoder_stride": 32,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "id2label": {
    "0": "Aeroplane",
    "1": "Bicycle",
    "2": "Bird",
    "3": "Boat",
    "4": "Bottle",
    "5": "Bus",
    "6": "Car",
    "7": "Cat",
    "8": "Chair",
    "9": "Cow",
    "10": "Diningtable",
    "11": "Dog",
    "12": "Horse",
    "13": "Motorbike",
    "14": "Person",
    "15": "Potted plant",
    "16": "Sheep",
    "17": "Sofa",
    "18": "Train",
    "19": "Tv/monitor"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "label2id": {
    "Aeroplane": 0,
    "Bicycle": 1,
    "Bird": 2,
    "Boat": 3,
    "Bottle": 4,
    "Bus": 5,
    "Car": 6,
    "Cat": 7,
    "Chair": 8,
    "Cow": 9,
    "Diningtable": 10,
    "Dog": 11,
    "Horse": 12,
    "Motorbike": 13,
    "Person": 14,
    "Potted plant": 15,
    "Sheep": 16,
    "Sofa": 17,
    "Train": 18,
    "Tv/monitor": 19
  },
  "layer_norm_eps": 1e-05,
  "mlp_ratio": 4.0,
  "model_type": "swin",
  "num_channels": 3,
  "num_heads": [
    4,
    8,
    16,
    32
  ],
  "num_layers": 4,
  "out_features": [
    "stage4"
  ],
  "out_indices": [
    4
  ],
  "patch_size": 4,
  "path_norm": true,
  "problem_type": "multi_label_classification",
  "qkv_bias": true,
  "stage_names": [
    "stem",
    "stage1",
    "stage2",
    "stage3",
    "stage4"
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.55.3",
  "use_absolute_embeddings": false,
  "window_size": 7
}