{ "model_name": "deit_base_distilled_patch16_224", "library": "timm", "input_size": 224, "num_classes": 2, "class_mapping": { "0": "real", "1": "fake" }, "distilled": true, "logits_handling": { "mode": "first", "description": "During training, model returns tuple (logits, dist_logits). Use outputs[0] for predictions. During eval with model.eval(), single tensor is returned." }, "normalization": { "scheme": "imagenet", "mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225] }, "head": { "architecture": [ {"type": "LayerNorm", "features": 768}, {"type": "Linear", "in_features": 768, "out_features": 512}, {"type": "GELU"}, {"type": "Dropout", "p": 0.2}, {"type": "Linear", "in_features": 512, "out_features": 2} ], "note": "Custom MLP head replaces both model.head and model.head_dist (shared weights)" }, "training": { "optimizer": "AdamW", "learning_rates": { "backbone": 2e-5, "head": 5e-5 }, "weight_decay": 1e-4, "gradient_clipping": { "max_norm": 1.0 }, "epochs": 2, "batch_size": 16, "criterion": "CrossEntropyLoss" } }