{ "activation_dropout": 0.0, "activation_fn": "gelu", "architectures": [ "BEiT3ForVietnameseVisualQuestionAnswering" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_vivqa.ViVQAConfig", "AutoModel": "modeling_vivqa.BEiT3ForVietnameseVisualQuestionAnswering" }, "bert_init": false, "checkpoint_activations": false, "ddp_rank": 0, "deepnorm": false, "drop_path_rate": 0.0, "dropout": 0.0, "encoder_attention_heads": 6, "encoder_embed_dim": 768, "encoder_ffn_embed_dim": 3072, "encoder_layers": 6, "encoder_normalize_before": true, "fsdp": false, "id2label": { "0": "hai", "1": "ba", "2": "b\u1ed1n", "3": "m\u00e0u tr\u1eafng", "4": "m\u00e0u \u0111\u1ecf", "5": "m\u00e0u xanh d\u01b0\u01a1ng", "6": "m\u00e0u \u0111en", "7": "m\u00e0u xanh l\u00e1", "8": "ph\u00f2ng", "9": "m\u00e0u v\u00e0ng", "10": "ph\u00f2ng b\u1ebfp", "11": "m\u00e0u n\u00e2u", "12": "ph\u00f2ng t\u1eafm", "13": "m\u00e0u cam", "14": "gi\u01b0\u1eddng", "15": "con m\u00e8o", "16": "h\u01b0\u01a1u cao c\u1ed5", "17": "m\u00e1y bay", "18": "g\u01b0\u01a1ng", "19": "n\u0103m", "20": "con chim", "21": "m\u00e0u x\u00e1m", "22": "m\u00e0u t\u00eda", "23": "con ch\u00f3", "24": "con thuy\u1ec1n", "25": "g\u1ea5u", "26": "xe \u00f4 t\u00f4", "27": "l\u1ecd c\u1eafm hoa", "28": "con voi", "29": "m\u1ed9t", "30": "con ng\u1ef1a", "31": "c\u00e1i gh\u1ebf", "32": "xe m\u00e1y", "33": "xe t\u1ea3i", "34": "t\u00e0u h\u1ecfa", "35": "xe bu\u00fdt", "36": "\u0111\u01b0\u1eddng ph\u1ed1", "37": "ch\u1eadu", "38": "h\u1ed9p", "39": "b\u00e1t", "40": "pizza", "41": "xe \u0111\u1ea1p", "42": "chu\u1ed3ng", "43": "con b\u00f2", "44": "vali", "45": "b\u00e1nh", "46": "\u0111\u1ed3ng h\u1ed3", "47": "s\u00e1u", "48": "di\u1ec1u", "49": "b\u0103ng gh\u1ebf", "50": "donut", "51": "nh\u00e0 v\u1ec7 sinh", "52": "l\u00f2 vi s\u00f3ng", "53": "sandwich", "54": "ng\u1ef1a v\u1eb1n", "55": "tr\u1ea1m", "56": "chi\u1ebfc \u00f4", "57": "ph\u00f2ng ng\u1ee7", "58": "ng\u1ef1a r\u1eb1n", "59": "\u0111\u0129a \u0103n", "60": "v\u00f2i", "61": "\u0111i\u1ec7n tho\u1ea1i", "62": "con c\u1eebu", "63": "t\u00f2a nh\u00e0", "64": "v\u00e1n tr\u01b0\u1ee3t", "65": "c\u1eeda s\u1ed5", "66": "c\u1eeda h\u00e0ng", "67": "t\u00f2a th\u00e1p", "68": "b\u1ed3n t\u1eafm", "69": "c\u00e1i r\u1ed5", "70": "c\u00e2y", "71": "m\u00e1y vi t\u00ednh", "72": "qu\u00e1n \u0103n", "73": "ga ra", "74": "ch\u1ea3o", "75": "v\u01b0\u1eddn b\u00e1ch th\u00fa", "76": "nh\u00e0 \u1edf", "77": "xe \u0111\u1ea9y", "78": "laptop", "79": "xe l\u1eeda", "80": "b\u00f4ng hoa", "81": "v\u00e1n l\u01b0\u1edbt s\u00f3ng", "82": "c\u00e1i t\u00fai", "83": "t\u1ee7 \u0111\u00e1", "84": "qu\u1ea3 b\u00f3ng", "85": "chu\u1ed1i", "86": "s\u00e2n bay", "87": "v\u0103n ph\u00f2ng", "88": "th\u00f9ng ch\u1ee9a", "89": "n\u00fai", "90": "c\u00e1i b\u00e0n", "91": "tr\u01b0\u1ee3t tuy\u1ebft", "92": "c\u00e0 v\u1ea1t", "93": "h\u1ed3 b\u01a1i", "94": "b\u00e3i c\u1ecf", "95": "b\u1ea3y", "96": "m\u00f3n \u0103n", "97": "\u0111\u01b0\u1eddng b\u1ed9", "98": "xe", "99": "n\u00f3n", "100": "\u0111\u1ed9ng c\u01a1", "101": "c\u00e1i m\u00e2m", "102": "g\u1eady", "103": "g\u1ea5u tr\u00fac", "104": "c\u1eeda ti\u1ec7m", "105": "con v\u1ecbt", "106": "l\u1ed3ng", "107": "t\u01b0\u1eddng", "108": "c\u00e1i n\u1ed3i", "109": "t\u1ee7 l\u1ea1nh", "110": "c\u1eeda", "111": "t\u00e1ch", "112": "b\u1ee9c \u1ea3nh", "113": "s\u00e2n v\u01b0\u1eddn", "114": "\u0111\u1ed3i", "115": "b\u1eefa \u0103n", "116": "s\u00e2n v\u1eadn \u0111\u1ed9ng", "117": "d\u0129a nh\u1ef1a", "118": "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng", "119": "m\u00e1y xay", "120": "\u0111\u1ed3 ch\u01a1i", "121": "m\u0169", "122": "rau", "123": "\u00e1o vest", "124": "v\u00f2i hoa sen", "125": "b\u00e0n ch\u1ea3i", "126": "c\u00e1i k\u1ec7", "127": "\u0111\u01b0\u1eddng", "128": "xe l\u0103n", "129": "c\u00e0 r\u1ed1t", "130": "xe c\u1ed9", "131": "th\u00e2n c\u00e2y", "132": "m\u00e1y \u1ea3nh", "133": "chai", "134": "\u00f4 c\u1eeda", "135": "s\u00e2n", "136": "b\u1ebfn du thuy\u1ec1n", "137": "dao", "138": "xe tay ga", "139": "qu\u00e1n bar", "140": "th\u01b0 vi\u1ec7n", "141": "h\u00e0nh l\u00fd", "142": "b\u1edd bi\u1ec3n", "143": "t\u00e1m", "144": "c\u00e1i l\u1ecd", "145": "m\u1eb7t tr\u1eddi", "146": "\u00e1o s\u01a1 mi", "147": "qu\u1ea7y t\u00ednh ti\u1ec1n", "148": "\u0111\u01b0\u1eddng s\u1eaft", "149": "b\u1ea7u tr\u1eddi", "150": "chu\u1ed9t", "151": "r\u00e0o ch\u1eafn", "152": "\u1ea3nh ch\u1ee5p", "153": "balo", "154": "b\u1ea3o t\u00e0ng", "155": "qu\u1ea3 t\u00e1o", "156": "hoa qu\u1ea3", "157": "b\u1ee9c t\u01b0\u1ee3ng", "158": "m\u00e1y t\u00ednh", "159": "c\u00e1c t\u00f2a nh\u00e0", "160": "ch\u00e9n \u0111\u0129a", "161": "m\u01b0\u1eddi", "162": "ch\u00edn", "163": "gi\u1ea5y b\u1ea1c", "164": "s\u00e0n nh\u00e0", "165": "chu\u1ed3ng tr\u1ea1i", "166": "l\u1edbp h\u1ecdc", "167": "kho", "168": "b\u1ebfp", "169": "b\u1ea3ng", "170": "gia s\u00fac", "171": "th\u1ecbt", "172": "b\u1ed3n ti\u1ec3u", "173": "t\u1ea1p d\u1ec1", "174": "c\u00e1i l\u1ec1u", "175": "g\u0103ng tay", "176": "h\u00e0nh lang", "177": "l\u00e1", "178": "t\u00fai", "179": "h\u1ea3i \u00e2u", "180": "v\u1ee3t", "181": "b\u00e0n ph\u00edm", "182": "s\u00f4 c\u00f4 la", "183": "r\u01b0\u1ee3u", "184": "t\u00e1o", "185": "gian h\u00e0ng", "186": "xe \u0111i\u1ec7n ng\u1ea7m", "187": "m\u00e1y s\u1ea5y kh\u00f4", "188": "toa xe", "189": "trang thi\u1ebft b\u1ecb", "190": "c\u1ed7 m\u00e1y", "191": "n\u01b0\u1edbc", "192": "c\u00e2y k\u00e9o", "193": "ng\u0103n k\u00e9o", "194": "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng", "195": "b\u00e1nh ng\u1ecdt", "196": "l\u1ed1i \u0111i", "197": "t\u00e0u", "198": "\u0111\u01b0\u1eddng \u0111i b\u1ed9", "199": "d\u0129a", "200": "con v\u1eb9t", "201": "l\u00e1 c\u1edd", "202": "kh\u0103n", "203": "chung c\u01b0", "204": "h\u1ed3", "205": "ca n\u00f4", "206": "gi\u00e1 \u0111\u1ee1", "207": "nh\u1eefng qu\u1ea3 cam", "208": "b\u1eefa tr\u01b0a", "209": "k\u00ednh \u0111eo", "210": "cupcake", "211": "\u0111\u01b0\u1eddng ray", "212": "b\u1ed9 \u0111\u1ed3", "213": "h\u00e0ng ho\u00e1", "214": "nh\u1eefng b\u1ee9c \u1ea3nh", "215": "c\u00e1i v\u00ed", "216": "c\u1eebu", "217": "ng\u01b0\u1eddi gi\u1eef", "218": "b\u1ee9c tranh", "219": "c\u1ea7u", "220": "nhi\u1ec1u c\u00e1i gh\u1ebf", "221": "b\u00f4ng c\u1ea3i xanh", "222": "b\u1eefa \u0103n t\u1ed1i", "223": "v\u1ebd tranh l\u00ean t\u01b0\u1eddng", "224": "thuy\u1ec1n bu\u1ed3m", "225": "\u0111i v\u0103ng", "226": "s\u00e2n kh\u1ea5u", "227": "n\u1ebfn", "228": "bu\u1ed3ng", "229": "c\u00e1i th\u00eca", "230": "c\u1ecf kh\u00f4", "231": "con kh\u1ec9", "232": "t\u01b0\u1ee3ng \u0111\u00e0i", "233": "t\u1ee7 \u0111\u00f4ng", "234": "hoa h\u1ed3ng", "235": "chim b\u1ed3 c\u00e2u", "236": "hay", "237": "g\u1ea7u m\u00fac", "238": "b\u00fai t\u00f3c", "239": "m\u00f3ng vu\u1ed1t", "240": "xe \u0111i\u1ec7n", "241": "\u0111\u0129a", "242": "m\u00e0n", "243": "\u00e1o kho\u00e1c", "244": "m\u1eb7t n\u1ea1", "245": "\u0111\u1ed3 u\u1ed1ng", "246": "b\u00f2 \u0111\u1ef1c", "247": "c\u00e1i n\u0129a", "248": "\u0111\u01b0\u1eddng \u1ed1ng", "249": "n\u01b0\u1edbc ti\u1ec3u", "250": "ly", "251": "\u0111\u00e8n \u0111\u1ec3 b\u00e0n", "252": "\u0111\u1ed3 n\u1ed9i th\u1ea5t", "253": "m\u00e1i ch\u00e8o", "254": "\u0111\u1ea7u m\u00e1y", "255": "\u0111\u1ea7m", "256": "m\u0169 l\u01b0\u1ee1i trai", "257": "truy\u1ec1n h\u00ecnh", "258": "ph\u00f4 mai", "259": "c\u00e0 ph\u00ea", "260": "b\u1ebfn t\u00e0u", "261": "con d\u00ea", "262": "c\u1eeda ra v\u00e0o", "263": "k\u00fd t\u00ean", "264": "thi\u1ebft b\u1ecb", "265": "b\u00ecnh hoa", "266": "bia", "267": "con d\u1ed1c", "268": "\u00e1o cho\u00e0ng", "269": "m\u00f3n tr\u00e1ng mi\u1ec7ng", "270": "c\u00e2y s\u00e0o", "271": "thu\u1ed1c l\u00e1", "272": "m\u1eb7t", "273": "k\u00ednh r\u00e2m", "274": "\u0111i\u00eau kh\u1eafc", "275": "nh\u00e0", "276": "rau qu\u1ea3", "277": "tr\u00e1i c\u00e2y", "278": "qu\u1ea3 cam", "279": "\u0111\u0129a n\u00e9m", "280": "ba lan", "281": "c\u00e2y g\u1eady", "282": "s\u1eefa", "283": "h\u1ed9p \u0111\u1ef1ng", "284": "khung", "285": "ngo\u00e0i tr\u1eddi", "286": "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u", "287": "c\u1edd", "288": "th\u00f9ng", "289": "l\u00f2 s\u01b0\u1edfi", "290": "l\u00e1t c\u1eaft", "291": "b\u1eafp ch\u00e2n", "292": "c\u00fan y\u00eau", "293": "ng\u00e2n h\u00e0ng", "294": "rau x\u00e0 l\u00e1ch", "295": "xa l\u1ed9", "296": "g\u00e0", "297": "qu\u1ea7n short", "298": "v\u00f2i n\u01b0\u1edbc", "299": "m\u0169 b\u1ea3o hi\u1ec3m", "300": "c\u00f4ng c\u1ee5", "301": "qu\u1ea3 cam ", "302": "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft", "303": "g\u1ea1ch", "304": "ch\u00ecm xu\u1ed1ng", "305": "kh\u0103n t\u1eafm", "306": "l\u00e1t g\u1ea1ch", "307": "ng\u0103n", "308": "b\u1ea3ng hi\u1ec7u", "309": "l\u0103n tr\u00f2n", "310": "hotdog", "311": "c\u1ecf", "312": "b\u00ecnh", "313": "b\u00ean", "314": "t\u00e0u ho\u1ea3", "315": "b\u00e1nh xe", "316": "lon", "317": "nh\u00e0 t\u1eafm", "318": "\u0111\u01b0\u1eddng \u0111ua", "319": "m\u00e0u s\u1eafc", "320": "bao b\u00ec", "321": "th\u00e0nh ph\u1ea7n", "322": "chim \u01b0ng", "323": "\u0111i\u1ec3m t\u00e2m", "324": "d\u0129a ", "325": "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng", "326": "h\u00e0ng h\u00f3a", "327": "pug", "328": "h\u1ed9p s\u1ed1", "329": "c\u00e1", "330": "gi\u1ecf", "331": "gh\u1ebf s\u00f4 pha", "332": "qu\u1ea7n \u00e1o", "333": "tr\u01b0\u1eddng h\u1ee3p", "334": "b\u00f2", "335": "v\u00f4 tuy\u1ebfn", "336": "con thoi", "337": "theo d\u00f5i", "338": "\u00e1o ba l\u1ed7", "339": "d\u00f2ng s\u00f4ng", "340": "g\u00e0 t\u00e2y", "341": "d\u1ea5u hi\u1ec7u", "342": "m\u00e8o con", "343": "m\u1eaft", "344": "\u0111\u01b0a \u0111\u00f3n", "345": "con heo", "346": "ngo\u00e0i", "347": "\u0111\u1ed3ng ph\u1ee5c", "348": "m\u00e1y bay tr\u1ef1c th\u0103ng", "349": "\u0111\u1ea1i d\u01b0\u01a1ng", "350": "b\u1ee9c m\u00e0n", "351": "cam", "352": "b\u00e1nh hamburger" }, "img_size": 224, "in_chans": 3, "label2id": null, "layernorm_embedding": false, "layernorm_eps": 1e-05, "max_rel_pos": 0, "max_source_positions": 1024, "model_type": "vivqa", "moe_eval_capacity_token_fraction": 0.25, "moe_expert_count": 0, "moe_freq": 0, "moe_gating_use_fp32": true, "moe_normalize_gate_prob_before_dropping": false, "moe_second_expert_policy": "random", "moe_top1_expert": false, "multiway": true, "no_output_layer": true, "no_scale_embedding": true, "normalize_output": true, "patch_size": 16, "rel_pos_buckets": 0, "share_encoder_input_output_embed": false, "subln": true, "torch_dtype": "float32", "transformers_version": "4.36.2", "use_xmoe": false, "vocab_size": -1, "xpos_rel_pos": false, "xpos_scale_base": 512 }