{ "backbone": "facebook/EUPE-ViT-B", "feature_dim": 768, "input_resolution": 768, "patch_size": 16, "patch_grid": [48, 48], "preprocessing": "layernorm over 768 channels then max-pool over 2304 patches", "pos_dims": [48, 525, 475, 645, 273, 292, 158, 510, 506, 337, 8, 309, 267, 217, 79, 13, 657, 207, 722, 311], "neg_dims": [642, 224, 113, 565, 49, 637, 45, 520, 219, 290, 529, 617, 269, 745, 576, 701, 105, 694, 82, 283], "pos_weight": 1.0, "neg_weight": -1.0, "threshold": 25.284494400024414, "decision": "sum(feat[pos_dims]) - sum(feat[neg_dims]) > threshold", "free_parameters": 1, "fixed_parameters": { "dim_indices": 40, "signs": 40 } }