| { | |
| "architectures": [ | |
| "VJEPA2ForVideoClassification" | |
| ], | |
| "attention_dropout": 0.0, | |
| "attention_probs_dropout_prob": 0.0, | |
| "crop_size": 384, | |
| "drop_path_rate": 0.0, | |
| "frames_per_clip": 64, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 1408, | |
| "id2label": { | |
| "0": "Approaching [something] with your camera", | |
| "1": "Attaching [something] to [something]", | |
| "2": "Bending [something] so that it deforms", | |
| "3": "Bending [something] until it breaks", | |
| "4": "Burying [something] in [something]", | |
| "5": "Closing [something]", | |
| "6": "Covering [something] with [something]", | |
| "7": "Digging [something] out of [something]", | |
| "8": "Dropping [something] behind [something]", | |
| "9": "Dropping [something] in front of [something]", | |
| "10": "Dropping [something] into [something]", | |
| "11": "Dropping [something] next to [something]", | |
| "12": "Dropping [something] onto [something]", | |
| "13": "Failing to put [something] into [something] because [something] does not fit", | |
| "14": "Folding [something]", | |
| "15": "Hitting [something] with [something]", | |
| "16": "Holding [something]", | |
| "17": "Holding [something] behind [something]", | |
| "18": "Holding [something] in front of [something]", | |
| "19": "Holding [something] next to [something]", | |
| "20": "Holding [something] over [something]", | |
| "21": "Laying [something] on the table on its side, not upright", | |
| "22": "Letting [something] roll along a flat surface", | |
| "23": "Letting [something] roll down a slanted surface", | |
| "24": "Letting [something] roll up a slanted surface, so it rolls back down", | |
| "25": "Lifting a surface with [something] on it but not enough for it to slide down", | |
| "26": "Lifting a surface with [something] on it until it starts sliding down", | |
| "27": "Lifting [something] up completely without letting it drop down", | |
| "28": "Lifting [something] up completely, then letting it drop down", | |
| "29": "Lifting [something] with [something] on it", | |
| "30": "Lifting up one end of [something] without letting it drop down", | |
| "31": "Lifting up one end of [something], then letting it drop down", | |
| "32": "Moving away from [something] with your camera", | |
| "33": "Moving [part] of [something]", | |
| "34": "Moving [something] across a surface until it falls down", | |
| "35": "Moving [something] across a surface without it falling down", | |
| "36": "Moving [something] and [something] away from each other", | |
| "37": "Moving [something] and [something] closer to each other", | |
| "38": "Moving [something] and [something] so they collide with each other", | |
| "39": "Moving [something] and [something] so they pass each other", | |
| "40": "Moving [something] away from [something]", | |
| "41": "Moving [something] away from the camera", | |
| "42": "Moving [something] closer to [something]", | |
| "43": "Moving [something] down", | |
| "44": "Moving [something] towards the camera", | |
| "45": "Moving [something] up", | |
| "46": "Opening [something]", | |
| "47": "Picking [something] up", | |
| "48": "Piling [something] up", | |
| "49": "Plugging [something] into [something]", | |
| "50": "Plugging [something] into [something] but pulling it right out as you remove your hand", | |
| "51": "Poking a hole into [some substance]", | |
| "52": "Poking a hole into [something soft]", | |
| "53": "Poking a stack of [something] so the stack collapses", | |
| "54": "Poking a stack of [something] without the stack collapsing", | |
| "55": "Poking [something] so it slightly moves", | |
| "56": "Poking [something] so lightly that it doesn't or almost doesn't move", | |
| "57": "Poking [something] so that it falls over", | |
| "58": "Poking [something] so that it spins around", | |
| "59": "Pouring [something] into [something]", | |
| "60": "Pouring [something] into [something] until it overflows", | |
| "61": "Pouring [something] onto [something]", | |
| "62": "Pouring [something] out of [something]", | |
| "63": "Pretending or failing to wipe [something] off of [something]", | |
| "64": "Pretending or trying and failing to twist [something]", | |
| "65": "Pretending to be tearing [something that is not tearable]", | |
| "66": "Pretending to close [something] without actually closing it", | |
| "67": "Pretending to open [something] without actually opening it", | |
| "68": "Pretending to pick [something] up", | |
| "69": "Pretending to poke [something]", | |
| "70": "Pretending to pour [something] out of [something], but [something] is empty", | |
| "71": "Pretending to put [something] behind [something]", | |
| "72": "Pretending to put [something] into [something]", | |
| "73": "Pretending to put [something] next to [something]", | |
| "74": "Pretending to put [something] on a surface", | |
| "75": "Pretending to put [something] onto [something]", | |
| "76": "Pretending to put [something] underneath [something]", | |
| "77": "Pretending to scoop [something] up with [something]", | |
| "78": "Pretending to spread air onto [something]", | |
| "79": "Pretending to sprinkle air onto [something]", | |
| "80": "Pretending to squeeze [something]", | |
| "81": "Pretending to take [something] from [somewhere]", | |
| "82": "Pretending to take [something] out of [something]", | |
| "83": "Pretending to throw [something]", | |
| "84": "Pretending to turn [something] upside down", | |
| "85": "Pulling [something] from behind of [something]", | |
| "86": "Pulling [something] from left to right", | |
| "87": "Pulling [something] from right to left", | |
| "88": "Pulling [something] onto [something]", | |
| "89": "Pulling [something] out of [something]", | |
| "90": "Pulling two ends of [something] but nothing happens", | |
| "91": "Pulling two ends of [something] so that it gets stretched", | |
| "92": "Pulling two ends of [something] so that it separates into two pieces", | |
| "93": "Pushing [something] from left to right", | |
| "94": "Pushing [something] from right to left", | |
| "95": "Pushing [something] off of [something]", | |
| "96": "Pushing [something] onto [something]", | |
| "97": "Pushing [something] so it spins", | |
| "98": "Pushing [something] so that it almost falls off but doesn't", | |
| "99": "Pushing [something] so that it falls off the table", | |
| "100": "Pushing [something] so that it slightly moves", | |
| "101": "Pushing [something] with [something]", | |
| "102": "Putting [number of] [something] onto [something]", | |
| "103": "Putting [something] and [something] on the table", | |
| "104": "Putting [something] behind [something]", | |
| "105": "Putting [something] in front of [something]", | |
| "106": "Putting [something] into [something]", | |
| "107": "Putting [something] next to [something]", | |
| "108": "Putting [something] on a flat surface without letting it roll", | |
| "109": "Putting [something] on a surface", | |
| "110": "Putting [something] on the edge of [something] so it is not supported and falls down", | |
| "111": "Putting [something] onto a slanted surface but it doesn't glide down", | |
| "112": "Putting [something] onto [something]", | |
| "113": "Putting [something] onto [something else that cannot support it] so it falls down", | |
| "114": "Putting [something similar to other things that are already on the table]", | |
| "115": "Putting [something] that can't roll onto a slanted surface, so it slides down", | |
| "116": "Putting [something] that can't roll onto a slanted surface, so it stays where it is", | |
| "117": "Putting [something that cannot actually stand upright] upright on the table, so it falls on its side", | |
| "118": "Putting [something] underneath [something]", | |
| "119": "Putting [something] upright on the table", | |
| "120": "Putting [something], [something] and [something] on the table", | |
| "121": "Removing [something], revealing [something] behind", | |
| "122": "Rolling [something] on a flat surface", | |
| "123": "Scooping [something] up with [something]", | |
| "124": "Showing a photo of [something] to the camera", | |
| "125": "Showing [something] behind [something]", | |
| "126": "Showing [something] next to [something]", | |
| "127": "Showing [something] on top of [something]", | |
| "128": "Showing [something] to the camera", | |
| "129": "Showing that [something] is empty", | |
| "130": "Showing that [something] is inside [something]", | |
| "131": "[Something] being deflected from [something]", | |
| "132": "[Something] colliding with [something] and both are being deflected", | |
| "133": "[Something] colliding with [something] and both come to a halt", | |
| "134": "[Something] falling like a feather or paper", | |
| "135": "[Something] falling like a rock", | |
| "136": "Spilling [something] behind [something]", | |
| "137": "Spilling [something] next to [something]", | |
| "138": "Spilling [something] onto [something]", | |
| "139": "Spinning [something] so it continues spinning", | |
| "140": "Spinning [something] that quickly stops spinning", | |
| "141": "Spreading [something] onto [something]", | |
| "142": "Sprinkling [something] onto [something]", | |
| "143": "Squeezing [something]", | |
| "144": "Stacking [number of] [something]", | |
| "145": "Stuffing [something] into [something]", | |
| "146": "Taking [one of many similar things on the table]", | |
| "147": "Taking [something] from [somewhere]", | |
| "148": "Taking [something] out of [something]", | |
| "149": "Tearing [something] into two pieces", | |
| "150": "Tearing [something] just a little bit", | |
| "151": "Throwing [something]", | |
| "152": "Throwing [something] against [something]", | |
| "153": "Throwing [something] in the air and catching it", | |
| "154": "Throwing [something] in the air and letting it fall", | |
| "155": "Throwing [something] onto a surface", | |
| "156": "Tilting [something] with [something] on it slightly so it doesn't fall down", | |
| "157": "Tilting [something] with [something] on it until it falls off", | |
| "158": "Tipping [something] over", | |
| "159": "Tipping [something] with [something in it] over, so [something in it] falls out", | |
| "160": "Touching (without moving) [part] of [something]", | |
| "161": "Trying but failing to attach [something] to [something] because it doesn't stick", | |
| "162": "Trying to bend [something unbendable] so nothing happens", | |
| "163": "Trying to pour [something] into [something], but missing so it spills next to it", | |
| "164": "Turning [something] upside down", | |
| "165": "Turning the camera downwards while filming [something]", | |
| "166": "Turning the camera left while filming [something]", | |
| "167": "Turning the camera right while filming [something]", | |
| "168": "Turning the camera upwards while filming [something]", | |
| "169": "Twisting (wringing) [something] wet until water comes out", | |
| "170": "Twisting [something]", | |
| "171": "Uncovering [something]", | |
| "172": "Unfolding [something]", | |
| "173": "Wiping [something] off of [something]" | |
| }, | |
| "image_size": 384, | |
| "in_chans": 3, | |
| "initializer_range": 0.02, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1, | |
| "LABEL_10": 10, | |
| "LABEL_100": 100, | |
| "LABEL_101": 101, | |
| "LABEL_102": 102, | |
| "LABEL_103": 103, | |
| "LABEL_104": 104, | |
| "LABEL_105": 105, | |
| "LABEL_106": 106, | |
| "LABEL_107": 107, | |
| "LABEL_108": 108, | |
| "LABEL_109": 109, | |
| "LABEL_11": 11, | |
| "LABEL_110": 110, | |
| "LABEL_111": 111, | |
| "LABEL_112": 112, | |
| "LABEL_113": 113, | |
| "LABEL_114": 114, | |
| "LABEL_115": 115, | |
| "LABEL_116": 116, | |
| "LABEL_117": 117, | |
| "LABEL_118": 118, | |
| "LABEL_119": 119, | |
| "LABEL_12": 12, | |
| "LABEL_120": 120, | |
| "LABEL_121": 121, | |
| "LABEL_122": 122, | |
| "LABEL_123": 123, | |
| "LABEL_124": 124, | |
| "LABEL_125": 125, | |
| "LABEL_126": 126, | |
| "LABEL_127": 127, | |
| "LABEL_128": 128, | |
| "LABEL_129": 129, | |
| "LABEL_13": 13, | |
| "LABEL_130": 130, | |
| "LABEL_131": 131, | |
| "LABEL_132": 132, | |
| "LABEL_133": 133, | |
| "LABEL_134": 134, | |
| "LABEL_135": 135, | |
| "LABEL_136": 136, | |
| "LABEL_137": 137, | |
| "LABEL_138": 138, | |
| "LABEL_139": 139, | |
| "LABEL_14": 14, | |
| "LABEL_140": 140, | |
| "LABEL_141": 141, | |
| "LABEL_142": 142, | |
| "LABEL_143": 143, | |
| "LABEL_144": 144, | |
| "LABEL_145": 145, | |
| "LABEL_146": 146, | |
| "LABEL_147": 147, | |
| "LABEL_148": 148, | |
| "LABEL_149": 149, | |
| "LABEL_15": 15, | |
| "LABEL_150": 150, | |
| "LABEL_151": 151, | |
| "LABEL_152": 152, | |
| "LABEL_153": 153, | |
| "LABEL_154": 154, | |
| "LABEL_155": 155, | |
| "LABEL_156": 156, | |
| "LABEL_157": 157, | |
| "LABEL_158": 158, | |
| "LABEL_159": 159, | |
| "LABEL_16": 16, | |
| "LABEL_160": 160, | |
| "LABEL_161": 161, | |
| "LABEL_162": 162, | |
| "LABEL_163": 163, | |
| "LABEL_164": 164, | |
| "LABEL_165": 165, | |
| "LABEL_166": 166, | |
| "LABEL_167": 167, | |
| "LABEL_168": 168, | |
| "LABEL_169": 169, | |
| "LABEL_17": 17, | |
| "LABEL_170": 170, | |
| "LABEL_171": 171, | |
| "LABEL_172": 172, | |
| "LABEL_173": 173, | |
| "LABEL_18": 18, | |
| "LABEL_19": 19, | |
| "LABEL_2": 2, | |
| "LABEL_20": 20, | |
| "LABEL_21": 21, | |
| "LABEL_22": 22, | |
| "LABEL_23": 23, | |
| "LABEL_24": 24, | |
| "LABEL_25": 25, | |
| "LABEL_26": 26, | |
| "LABEL_27": 27, | |
| "LABEL_28": 28, | |
| "LABEL_29": 29, | |
| "LABEL_3": 3, | |
| "LABEL_30": 30, | |
| "LABEL_31": 31, | |
| "LABEL_32": 32, | |
| "LABEL_33": 33, | |
| "LABEL_34": 34, | |
| "LABEL_35": 35, | |
| "LABEL_36": 36, | |
| "LABEL_37": 37, | |
| "LABEL_38": 38, | |
| "LABEL_39": 39, | |
| "LABEL_4": 4, | |
| "LABEL_40": 40, | |
| "LABEL_41": 41, | |
| "LABEL_42": 42, | |
| "LABEL_43": 43, | |
| "LABEL_44": 44, | |
| "LABEL_45": 45, | |
| "LABEL_46": 46, | |
| "LABEL_47": 47, | |
| "LABEL_48": 48, | |
| "LABEL_49": 49, | |
| "LABEL_5": 5, | |
| "LABEL_50": 50, | |
| "LABEL_51": 51, | |
| "LABEL_52": 52, | |
| "LABEL_53": 53, | |
| "LABEL_54": 54, | |
| "LABEL_55": 55, | |
| "LABEL_56": 56, | |
| "LABEL_57": 57, | |
| "LABEL_58": 58, | |
| "LABEL_59": 59, | |
| "LABEL_6": 6, | |
| "LABEL_60": 60, | |
| "LABEL_61": 61, | |
| "LABEL_62": 62, | |
| "LABEL_63": 63, | |
| "LABEL_64": 64, | |
| "LABEL_65": 65, | |
| "LABEL_66": 66, | |
| "LABEL_67": 67, | |
| "LABEL_68": 68, | |
| "LABEL_69": 69, | |
| "LABEL_7": 7, | |
| "LABEL_70": 70, | |
| "LABEL_71": 71, | |
| "LABEL_72": 72, | |
| "LABEL_73": 73, | |
| "LABEL_74": 74, | |
| "LABEL_75": 75, | |
| "LABEL_76": 76, | |
| "LABEL_77": 77, | |
| "LABEL_78": 78, | |
| "LABEL_79": 79, | |
| "LABEL_8": 8, | |
| "LABEL_80": 80, | |
| "LABEL_81": 81, | |
| "LABEL_82": 82, | |
| "LABEL_83": 83, | |
| "LABEL_84": 84, | |
| "LABEL_85": 85, | |
| "LABEL_86": 86, | |
| "LABEL_87": 87, | |
| "LABEL_88": 88, | |
| "LABEL_89": 89, | |
| "LABEL_9": 9, | |
| "LABEL_90": 90, | |
| "LABEL_91": 91, | |
| "LABEL_92": 92, | |
| "LABEL_93": 93, | |
| "LABEL_94": 94, | |
| "LABEL_95": 95, | |
| "LABEL_96": 96, | |
| "LABEL_97": 97, | |
| "LABEL_98": 98, | |
| "LABEL_99": 99 | |
| }, | |
| "layer_norm_eps": 1e-06, | |
| "mlp_ratio": 4.363636363636363, | |
| "model_type": "vjepa2", | |
| "num_attention_heads": 22, | |
| "num_hidden_layers": 40, | |
| "num_pooler_layers": 3, | |
| "patch_size": 16, | |
| "pred_hidden_size": 384, | |
| "pred_mlp_ratio": 4.0, | |
| "pred_num_attention_heads": 12, | |
| "pred_num_hidden_layers": 12, | |
| "pred_num_mask_tokens": 10, | |
| "pred_zero_init_mask_tokens": true, | |
| "qkv_bias": true, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.53.0.dev0", | |
| "tubelet_size": 2, | |
| "wide_SiLU": true | |
| } | |