{
  "model_name": "deit_base_distilled_patch16_224",
  "library": "timm",
  "input_size": 224,
  "num_classes": 2,
  "class_mapping": {
    "0": "real",
    "1": "fake"
  },
  "distilled": true,
  "logits_handling": {
    "mode": "first",
    "description": "During training, model returns tuple (logits, dist_logits). Use outputs[0] for predictions. During eval with model.eval(), single tensor is returned."
  },
  "normalization": {
    "scheme": "imagenet",
    "mean": [0.485, 0.456, 0.406],
    "std": [0.229, 0.224, 0.225]
  },
  "head": {
    "architecture": [
      {"type": "LayerNorm", "features": 768},
      {"type": "Linear", "in_features": 768, "out_features": 512},
      {"type": "GELU"},
      {"type": "Dropout", "p": 0.2},
      {"type": "Linear", "in_features": 512, "out_features": 2}
    ],
    "note": "Custom MLP head replaces both model.head and model.head_dist (shared weights)"
  },
  "training": {
    "optimizer": "AdamW",
    "learning_rates": {
      "backbone": 2e-5,
      "head": 5e-5
    },
    "weight_decay": 1e-4,
    "gradient_clipping": {
      "max_norm": 1.0
    },
    "epochs": 2,
    "batch_size": 16,
    "criterion": "CrossEntropyLoss"
  }
}