{ "architectures": [ "VJEPA2ForVideoClassification" ], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.0, "crop_size": 384, "drop_path_rate": 0.0, "frames_per_clip": 64, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 1408, "id2label": { "0": "Approaching [something] with your camera", "1": "Attaching [something] to [something]", "2": "Bending [something] so that it deforms", "3": "Bending [something] until it breaks", "4": "Burying [something] in [something]", "5": "Closing [something]", "6": "Covering [something] with [something]", "7": "Digging [something] out of [something]", "8": "Dropping [something] behind [something]", "9": "Dropping [something] in front of [something]", "10": "Dropping [something] into [something]", "11": "Dropping [something] next to [something]", "12": "Dropping [something] onto [something]", "13": "Failing to put [something] into [something] because [something] does not fit", "14": "Folding [something]", "15": "Hitting [something] with [something]", "16": "Holding [something]", "17": "Holding [something] behind [something]", "18": "Holding [something] in front of [something]", "19": "Holding [something] next to [something]", "20": "Holding [something] over [something]", "21": "Laying [something] on the table on its side, not upright", "22": "Letting [something] roll along a flat surface", "23": "Letting [something] roll down a slanted surface", "24": "Letting [something] roll up a slanted surface, so it rolls back down", "25": "Lifting a surface with [something] on it but not enough for it to slide down", "26": "Lifting a surface with [something] on it until it starts sliding down", "27": "Lifting [something] up completely without letting it drop down", "28": "Lifting [something] up completely, then letting it drop down", "29": "Lifting [something] with [something] on it", "30": "Lifting up one end of [something] without letting it drop down", "31": "Lifting up one end of [something], then letting it drop down", "32": "Moving away from [something] with your camera", "33": "Moving [part] of [something]", "34": "Moving [something] across a surface until it falls down", "35": "Moving [something] across a surface without it falling down", "36": "Moving [something] and [something] away from each other", "37": "Moving [something] and [something] closer to each other", "38": "Moving [something] and [something] so they collide with each other", "39": "Moving [something] and [something] so they pass each other", "40": "Moving [something] away from [something]", "41": "Moving [something] away from the camera", "42": "Moving [something] closer to [something]", "43": "Moving [something] down", "44": "Moving [something] towards the camera", "45": "Moving [something] up", "46": "Opening [something]", "47": "Picking [something] up", "48": "Piling [something] up", "49": "Plugging [something] into [something]", "50": "Plugging [something] into [something] but pulling it right out as you remove your hand", "51": "Poking a hole into [some substance]", "52": "Poking a hole into [something soft]", "53": "Poking a stack of [something] so the stack collapses", "54": "Poking a stack of [something] without the stack collapsing", "55": "Poking [something] so it slightly moves", "56": "Poking [something] so lightly that it doesn't or almost doesn't move", "57": "Poking [something] so that it falls over", "58": "Poking [something] so that it spins around", "59": "Pouring [something] into [something]", "60": "Pouring [something] into [something] until it overflows", "61": "Pouring [something] onto [something]", "62": "Pouring [something] out of [something]", "63": "Pretending or failing to wipe [something] off of [something]", "64": "Pretending or trying and failing to twist [something]", "65": "Pretending to be tearing [something that is not tearable]", "66": "Pretending to close [something] without actually closing it", "67": "Pretending to open [something] without actually opening it", "68": "Pretending to pick [something] up", "69": "Pretending to poke [something]", "70": "Pretending to pour [something] out of [something], but [something] is empty", "71": "Pretending to put [something] behind [something]", "72": "Pretending to put [something] into [something]", "73": "Pretending to put [something] next to [something]", "74": "Pretending to put [something] on a surface", "75": "Pretending to put [something] onto [something]", "76": "Pretending to put [something] underneath [something]", "77": "Pretending to scoop [something] up with [something]", "78": "Pretending to spread air onto [something]", "79": "Pretending to sprinkle air onto [something]", "80": "Pretending to squeeze [something]", "81": "Pretending to take [something] from [somewhere]", "82": "Pretending to take [something] out of [something]", "83": "Pretending to throw [something]", "84": "Pretending to turn [something] upside down", "85": "Pulling [something] from behind of [something]", "86": "Pulling [something] from left to right", "87": "Pulling [something] from right to left", "88": "Pulling [something] onto [something]", "89": "Pulling [something] out of [something]", "90": "Pulling two ends of [something] but nothing happens", "91": "Pulling two ends of [something] so that it gets stretched", "92": "Pulling two ends of [something] so that it separates into two pieces", "93": "Pushing [something] from left to right", "94": "Pushing [something] from right to left", "95": "Pushing [something] off of [something]", "96": "Pushing [something] onto [something]", "97": "Pushing [something] so it spins", "98": "Pushing [something] so that it almost falls off but doesn't", "99": "Pushing [something] so that it falls off the table", "100": "Pushing [something] so that it slightly moves", "101": "Pushing [something] with [something]", "102": "Putting [number of] [something] onto [something]", "103": "Putting [something] and [something] on the table", "104": "Putting [something] behind [something]", "105": "Putting [something] in front of [something]", "106": "Putting [something] into [something]", "107": "Putting [something] next to [something]", "108": "Putting [something] on a flat surface without letting it roll", "109": "Putting [something] on a surface", "110": "Putting [something] on the edge of [something] so it is not supported and falls down", "111": "Putting [something] onto a slanted surface but it doesn't glide down", "112": "Putting [something] onto [something]", "113": "Putting [something] onto [something else that cannot support it] so it falls down", "114": "Putting [something similar to other things that are already on the table]", "115": "Putting [something] that can't roll onto a slanted surface, so it slides down", "116": "Putting [something] that can't roll onto a slanted surface, so it stays where it is", "117": "Putting [something that cannot actually stand upright] upright on the table, so it falls on its side", "118": "Putting [something] underneath [something]", "119": "Putting [something] upright on the table", "120": "Putting [something], [something] and [something] on the table", "121": "Removing [something], revealing [something] behind", "122": "Rolling [something] on a flat surface", "123": "Scooping [something] up with [something]", "124": "Showing a photo of [something] to the camera", "125": "Showing [something] behind [something]", "126": "Showing [something] next to [something]", "127": "Showing [something] on top of [something]", "128": "Showing [something] to the camera", "129": "Showing that [something] is empty", "130": "Showing that [something] is inside [something]", "131": "[Something] being deflected from [something]", "132": "[Something] colliding with [something] and both are being deflected", "133": "[Something] colliding with [something] and both come to a halt", "134": "[Something] falling like a feather or paper", "135": "[Something] falling like a rock", "136": "Spilling [something] behind [something]", "137": "Spilling [something] next to [something]", "138": "Spilling [something] onto [something]", "139": "Spinning [something] so it continues spinning", "140": "Spinning [something] that quickly stops spinning", "141": "Spreading [something] onto [something]", "142": "Sprinkling [something] onto [something]", "143": "Squeezing [something]", "144": "Stacking [number of] [something]", "145": "Stuffing [something] into [something]", "146": "Taking [one of many similar things on the table]", "147": "Taking [something] from [somewhere]", "148": "Taking [something] out of [something]", "149": "Tearing [something] into two pieces", "150": "Tearing [something] just a little bit", "151": "Throwing [something]", "152": "Throwing [something] against [something]", "153": "Throwing [something] in the air and catching it", "154": "Throwing [something] in the air and letting it fall", "155": "Throwing [something] onto a surface", "156": "Tilting [something] with [something] on it slightly so it doesn't fall down", "157": "Tilting [something] with [something] on it until it falls off", "158": "Tipping [something] over", "159": "Tipping [something] with [something in it] over, so [something in it] falls out", "160": "Touching (without moving) [part] of [something]", "161": "Trying but failing to attach [something] to [something] because it doesn't stick", "162": "Trying to bend [something unbendable] so nothing happens", "163": "Trying to pour [something] into [something], but missing so it spills next to it", "164": "Turning [something] upside down", "165": "Turning the camera downwards while filming [something]", "166": "Turning the camera left while filming [something]", "167": "Turning the camera right while filming [something]", "168": "Turning the camera upwards while filming [something]", "169": "Twisting (wringing) [something] wet until water comes out", "170": "Twisting [something]", "171": "Uncovering [something]", "172": "Unfolding [something]", "173": "Wiping [something] off of [something]" }, "image_size": 384, "in_chans": 3, "initializer_range": 0.02, "label2id": { "LABEL_0": 0, "LABEL_1": 1, "LABEL_10": 10, "LABEL_100": 100, "LABEL_101": 101, "LABEL_102": 102, "LABEL_103": 103, "LABEL_104": 104, "LABEL_105": 105, "LABEL_106": 106, "LABEL_107": 107, "LABEL_108": 108, "LABEL_109": 109, "LABEL_11": 11, "LABEL_110": 110, "LABEL_111": 111, "LABEL_112": 112, "LABEL_113": 113, "LABEL_114": 114, "LABEL_115": 115, "LABEL_116": 116, "LABEL_117": 117, "LABEL_118": 118, "LABEL_119": 119, "LABEL_12": 12, "LABEL_120": 120, "LABEL_121": 121, "LABEL_122": 122, "LABEL_123": 123, "LABEL_124": 124, "LABEL_125": 125, "LABEL_126": 126, "LABEL_127": 127, "LABEL_128": 128, "LABEL_129": 129, "LABEL_13": 13, "LABEL_130": 130, "LABEL_131": 131, "LABEL_132": 132, "LABEL_133": 133, "LABEL_134": 134, "LABEL_135": 135, "LABEL_136": 136, "LABEL_137": 137, "LABEL_138": 138, "LABEL_139": 139, "LABEL_14": 14, "LABEL_140": 140, "LABEL_141": 141, "LABEL_142": 142, "LABEL_143": 143, "LABEL_144": 144, "LABEL_145": 145, "LABEL_146": 146, "LABEL_147": 147, "LABEL_148": 148, "LABEL_149": 149, "LABEL_15": 15, "LABEL_150": 150, "LABEL_151": 151, "LABEL_152": 152, "LABEL_153": 153, "LABEL_154": 154, "LABEL_155": 155, "LABEL_156": 156, "LABEL_157": 157, "LABEL_158": 158, "LABEL_159": 159, "LABEL_16": 16, "LABEL_160": 160, "LABEL_161": 161, "LABEL_162": 162, "LABEL_163": 163, "LABEL_164": 164, "LABEL_165": 165, "LABEL_166": 166, "LABEL_167": 167, "LABEL_168": 168, "LABEL_169": 169, "LABEL_17": 17, "LABEL_170": 170, "LABEL_171": 171, "LABEL_172": 172, "LABEL_173": 173, "LABEL_18": 18, "LABEL_19": 19, "LABEL_2": 2, "LABEL_20": 20, "LABEL_21": 21, "LABEL_22": 22, "LABEL_23": 23, "LABEL_24": 24, "LABEL_25": 25, "LABEL_26": 26, "LABEL_27": 27, "LABEL_28": 28, "LABEL_29": 29, "LABEL_3": 3, "LABEL_30": 30, "LABEL_31": 31, "LABEL_32": 32, "LABEL_33": 33, "LABEL_34": 34, "LABEL_35": 35, "LABEL_36": 36, "LABEL_37": 37, "LABEL_38": 38, "LABEL_39": 39, "LABEL_4": 4, "LABEL_40": 40, "LABEL_41": 41, "LABEL_42": 42, "LABEL_43": 43, "LABEL_44": 44, "LABEL_45": 45, "LABEL_46": 46, "LABEL_47": 47, "LABEL_48": 48, "LABEL_49": 49, "LABEL_5": 5, "LABEL_50": 50, "LABEL_51": 51, "LABEL_52": 52, "LABEL_53": 53, "LABEL_54": 54, "LABEL_55": 55, "LABEL_56": 56, "LABEL_57": 57, "LABEL_58": 58, "LABEL_59": 59, "LABEL_6": 6, "LABEL_60": 60, "LABEL_61": 61, "LABEL_62": 62, "LABEL_63": 63, "LABEL_64": 64, "LABEL_65": 65, "LABEL_66": 66, "LABEL_67": 67, "LABEL_68": 68, "LABEL_69": 69, "LABEL_7": 7, "LABEL_70": 70, "LABEL_71": 71, "LABEL_72": 72, "LABEL_73": 73, "LABEL_74": 74, "LABEL_75": 75, "LABEL_76": 76, "LABEL_77": 77, "LABEL_78": 78, "LABEL_79": 79, "LABEL_8": 8, "LABEL_80": 80, "LABEL_81": 81, "LABEL_82": 82, "LABEL_83": 83, "LABEL_84": 84, "LABEL_85": 85, "LABEL_86": 86, "LABEL_87": 87, "LABEL_88": 88, "LABEL_89": 89, "LABEL_9": 9, "LABEL_90": 90, "LABEL_91": 91, "LABEL_92": 92, "LABEL_93": 93, "LABEL_94": 94, "LABEL_95": 95, "LABEL_96": 96, "LABEL_97": 97, "LABEL_98": 98, "LABEL_99": 99 }, "layer_norm_eps": 1e-06, "mlp_ratio": 4.363636363636363, "model_type": "vjepa2", "num_attention_heads": 22, "num_hidden_layers": 40, "num_pooler_layers": 3, "patch_size": 16, "pred_hidden_size": 384, "pred_mlp_ratio": 4.0, "pred_num_attention_heads": 12, "pred_num_hidden_layers": 12, "pred_num_mask_tokens": 10, "pred_zero_init_mask_tokens": true, "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.53.0.dev0", "tubelet_size": 2, "wide_SiLU": true }