| { | |
| "activation_dropout": 0.0, | |
| "activation_fn": "gelu", | |
| "architectures": [ | |
| "BEiT3ForVietnameseVisualQuestionAnswering" | |
| ], | |
| "attention_dropout": 0.0, | |
| "auto_map": { | |
| "AutoConfig": "configuration_vivqa.ViVQAConfig", | |
| "AutoModel": "modeling_vivqa.BEiT3ForVietnameseVisualQuestionAnswering" | |
| }, | |
| "bert_init": false, | |
| "checkpoint_activations": false, | |
| "ddp_rank": 0, | |
| "deepnorm": false, | |
| "drop_path_rate": 0.0, | |
| "dropout": 0.0, | |
| "encoder_attention_heads": 6, | |
| "encoder_embed_dim": 768, | |
| "encoder_ffn_embed_dim": 3072, | |
| "encoder_layers": 6, | |
| "encoder_normalize_before": true, | |
| "fsdp": false, | |
| "id2label": { | |
| "0": "hai", | |
| "1": "ba", | |
| "2": "b\u1ed1n", | |
| "3": "m\u00e0u tr\u1eafng", | |
| "4": "m\u00e0u \u0111\u1ecf", | |
| "5": "m\u00e0u xanh d\u01b0\u01a1ng", | |
| "6": "m\u00e0u \u0111en", | |
| "7": "m\u00e0u xanh l\u00e1", | |
| "8": "ph\u00f2ng", | |
| "9": "m\u00e0u v\u00e0ng", | |
| "10": "ph\u00f2ng b\u1ebfp", | |
| "11": "m\u00e0u n\u00e2u", | |
| "12": "ph\u00f2ng t\u1eafm", | |
| "13": "m\u00e0u cam", | |
| "14": "gi\u01b0\u1eddng", | |
| "15": "con m\u00e8o", | |
| "16": "h\u01b0\u01a1u cao c\u1ed5", | |
| "17": "m\u00e1y bay", | |
| "18": "g\u01b0\u01a1ng", | |
| "19": "n\u0103m", | |
| "20": "con chim", | |
| "21": "m\u00e0u x\u00e1m", | |
| "22": "m\u00e0u t\u00eda", | |
| "23": "con ch\u00f3", | |
| "24": "con thuy\u1ec1n", | |
| "25": "g\u1ea5u", | |
| "26": "xe \u00f4 t\u00f4", | |
| "27": "l\u1ecd c\u1eafm hoa", | |
| "28": "con voi", | |
| "29": "m\u1ed9t", | |
| "30": "con ng\u1ef1a", | |
| "31": "c\u00e1i gh\u1ebf", | |
| "32": "xe m\u00e1y", | |
| "33": "xe t\u1ea3i", | |
| "34": "t\u00e0u h\u1ecfa", | |
| "35": "xe bu\u00fdt", | |
| "36": "\u0111\u01b0\u1eddng ph\u1ed1", | |
| "37": "ch\u1eadu", | |
| "38": "h\u1ed9p", | |
| "39": "b\u00e1t", | |
| "40": "pizza", | |
| "41": "xe \u0111\u1ea1p", | |
| "42": "chu\u1ed3ng", | |
| "43": "con b\u00f2", | |
| "44": "vali", | |
| "45": "b\u00e1nh", | |
| "46": "\u0111\u1ed3ng h\u1ed3", | |
| "47": "s\u00e1u", | |
| "48": "di\u1ec1u", | |
| "49": "b\u0103ng gh\u1ebf", | |
| "50": "donut", | |
| "51": "nh\u00e0 v\u1ec7 sinh", | |
| "52": "l\u00f2 vi s\u00f3ng", | |
| "53": "sandwich", | |
| "54": "ng\u1ef1a v\u1eb1n", | |
| "55": "tr\u1ea1m", | |
| "56": "chi\u1ebfc \u00f4", | |
| "57": "ph\u00f2ng ng\u1ee7", | |
| "58": "ng\u1ef1a r\u1eb1n", | |
| "59": "\u0111\u0129a \u0103n", | |
| "60": "v\u00f2i", | |
| "61": "\u0111i\u1ec7n tho\u1ea1i", | |
| "62": "con c\u1eebu", | |
| "63": "t\u00f2a nh\u00e0", | |
| "64": "v\u00e1n tr\u01b0\u1ee3t", | |
| "65": "c\u1eeda s\u1ed5", | |
| "66": "c\u1eeda h\u00e0ng", | |
| "67": "t\u00f2a th\u00e1p", | |
| "68": "b\u1ed3n t\u1eafm", | |
| "69": "c\u00e1i r\u1ed5", | |
| "70": "c\u00e2y", | |
| "71": "m\u00e1y vi t\u00ednh", | |
| "72": "qu\u00e1n \u0103n", | |
| "73": "ga ra", | |
| "74": "ch\u1ea3o", | |
| "75": "v\u01b0\u1eddn b\u00e1ch th\u00fa", | |
| "76": "nh\u00e0 \u1edf", | |
| "77": "xe \u0111\u1ea9y", | |
| "78": "laptop", | |
| "79": "xe l\u1eeda", | |
| "80": "b\u00f4ng hoa", | |
| "81": "v\u00e1n l\u01b0\u1edbt s\u00f3ng", | |
| "82": "c\u00e1i t\u00fai", | |
| "83": "t\u1ee7 \u0111\u00e1", | |
| "84": "qu\u1ea3 b\u00f3ng", | |
| "85": "chu\u1ed1i", | |
| "86": "s\u00e2n bay", | |
| "87": "v\u0103n ph\u00f2ng", | |
| "88": "th\u00f9ng ch\u1ee9a", | |
| "89": "n\u00fai", | |
| "90": "c\u00e1i b\u00e0n", | |
| "91": "tr\u01b0\u1ee3t tuy\u1ebft", | |
| "92": "c\u00e0 v\u1ea1t", | |
| "93": "h\u1ed3 b\u01a1i", | |
| "94": "b\u00e3i c\u1ecf", | |
| "95": "b\u1ea3y", | |
| "96": "m\u00f3n \u0103n", | |
| "97": "\u0111\u01b0\u1eddng b\u1ed9", | |
| "98": "xe", | |
| "99": "n\u00f3n", | |
| "100": "\u0111\u1ed9ng c\u01a1", | |
| "101": "c\u00e1i m\u00e2m", | |
| "102": "g\u1eady", | |
| "103": "g\u1ea5u tr\u00fac", | |
| "104": "c\u1eeda ti\u1ec7m", | |
| "105": "con v\u1ecbt", | |
| "106": "l\u1ed3ng", | |
| "107": "t\u01b0\u1eddng", | |
| "108": "c\u00e1i n\u1ed3i", | |
| "109": "t\u1ee7 l\u1ea1nh", | |
| "110": "c\u1eeda", | |
| "111": "t\u00e1ch", | |
| "112": "b\u1ee9c \u1ea3nh", | |
| "113": "s\u00e2n v\u01b0\u1eddn", | |
| "114": "\u0111\u1ed3i", | |
| "115": "b\u1eefa \u0103n", | |
| "116": "s\u00e2n v\u1eadn \u0111\u1ed9ng", | |
| "117": "d\u0129a nh\u1ef1a", | |
| "118": "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng", | |
| "119": "m\u00e1y xay", | |
| "120": "\u0111\u1ed3 ch\u01a1i", | |
| "121": "m\u0169", | |
| "122": "rau", | |
| "123": "\u00e1o vest", | |
| "124": "v\u00f2i hoa sen", | |
| "125": "b\u00e0n ch\u1ea3i", | |
| "126": "c\u00e1i k\u1ec7", | |
| "127": "\u0111\u01b0\u1eddng", | |
| "128": "xe l\u0103n", | |
| "129": "c\u00e0 r\u1ed1t", | |
| "130": "xe c\u1ed9", | |
| "131": "th\u00e2n c\u00e2y", | |
| "132": "m\u00e1y \u1ea3nh", | |
| "133": "chai", | |
| "134": "\u00f4 c\u1eeda", | |
| "135": "s\u00e2n", | |
| "136": "b\u1ebfn du thuy\u1ec1n", | |
| "137": "dao", | |
| "138": "xe tay ga", | |
| "139": "qu\u00e1n bar", | |
| "140": "th\u01b0 vi\u1ec7n", | |
| "141": "h\u00e0nh l\u00fd", | |
| "142": "b\u1edd bi\u1ec3n", | |
| "143": "t\u00e1m", | |
| "144": "c\u00e1i l\u1ecd", | |
| "145": "m\u1eb7t tr\u1eddi", | |
| "146": "\u00e1o s\u01a1 mi", | |
| "147": "qu\u1ea7y t\u00ednh ti\u1ec1n", | |
| "148": "\u0111\u01b0\u1eddng s\u1eaft", | |
| "149": "b\u1ea7u tr\u1eddi", | |
| "150": "chu\u1ed9t", | |
| "151": "r\u00e0o ch\u1eafn", | |
| "152": "\u1ea3nh ch\u1ee5p", | |
| "153": "balo", | |
| "154": "b\u1ea3o t\u00e0ng", | |
| "155": "qu\u1ea3 t\u00e1o", | |
| "156": "hoa qu\u1ea3", | |
| "157": "b\u1ee9c t\u01b0\u1ee3ng", | |
| "158": "m\u00e1y t\u00ednh", | |
| "159": "c\u00e1c t\u00f2a nh\u00e0", | |
| "160": "ch\u00e9n \u0111\u0129a", | |
| "161": "m\u01b0\u1eddi", | |
| "162": "ch\u00edn", | |
| "163": "gi\u1ea5y b\u1ea1c", | |
| "164": "s\u00e0n nh\u00e0", | |
| "165": "chu\u1ed3ng tr\u1ea1i", | |
| "166": "l\u1edbp h\u1ecdc", | |
| "167": "kho", | |
| "168": "b\u1ebfp", | |
| "169": "b\u1ea3ng", | |
| "170": "gia s\u00fac", | |
| "171": "th\u1ecbt", | |
| "172": "b\u1ed3n ti\u1ec3u", | |
| "173": "t\u1ea1p d\u1ec1", | |
| "174": "c\u00e1i l\u1ec1u", | |
| "175": "g\u0103ng tay", | |
| "176": "h\u00e0nh lang", | |
| "177": "l\u00e1", | |
| "178": "t\u00fai", | |
| "179": "h\u1ea3i \u00e2u", | |
| "180": "v\u1ee3t", | |
| "181": "b\u00e0n ph\u00edm", | |
| "182": "s\u00f4 c\u00f4 la", | |
| "183": "r\u01b0\u1ee3u", | |
| "184": "t\u00e1o", | |
| "185": "gian h\u00e0ng", | |
| "186": "xe \u0111i\u1ec7n ng\u1ea7m", | |
| "187": "m\u00e1y s\u1ea5y kh\u00f4", | |
| "188": "toa xe", | |
| "189": "trang thi\u1ebft b\u1ecb", | |
| "190": "c\u1ed7 m\u00e1y", | |
| "191": "n\u01b0\u1edbc", | |
| "192": "c\u00e2y k\u00e9o", | |
| "193": "ng\u0103n k\u00e9o", | |
| "194": "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng", | |
| "195": "b\u00e1nh ng\u1ecdt", | |
| "196": "l\u1ed1i \u0111i", | |
| "197": "t\u00e0u", | |
| "198": "\u0111\u01b0\u1eddng \u0111i b\u1ed9", | |
| "199": "d\u0129a", | |
| "200": "con v\u1eb9t", | |
| "201": "l\u00e1 c\u1edd", | |
| "202": "kh\u0103n", | |
| "203": "chung c\u01b0", | |
| "204": "h\u1ed3", | |
| "205": "ca n\u00f4", | |
| "206": "gi\u00e1 \u0111\u1ee1", | |
| "207": "nh\u1eefng qu\u1ea3 cam", | |
| "208": "b\u1eefa tr\u01b0a", | |
| "209": "k\u00ednh \u0111eo", | |
| "210": "cupcake", | |
| "211": "\u0111\u01b0\u1eddng ray", | |
| "212": "b\u1ed9 \u0111\u1ed3", | |
| "213": "h\u00e0ng ho\u00e1", | |
| "214": "nh\u1eefng b\u1ee9c \u1ea3nh", | |
| "215": "c\u00e1i v\u00ed", | |
| "216": "c\u1eebu", | |
| "217": "ng\u01b0\u1eddi gi\u1eef", | |
| "218": "b\u1ee9c tranh", | |
| "219": "c\u1ea7u", | |
| "220": "nhi\u1ec1u c\u00e1i gh\u1ebf", | |
| "221": "b\u00f4ng c\u1ea3i xanh", | |
| "222": "b\u1eefa \u0103n t\u1ed1i", | |
| "223": "v\u1ebd tranh l\u00ean t\u01b0\u1eddng", | |
| "224": "thuy\u1ec1n bu\u1ed3m", | |
| "225": "\u0111i v\u0103ng", | |
| "226": "s\u00e2n kh\u1ea5u", | |
| "227": "n\u1ebfn", | |
| "228": "bu\u1ed3ng", | |
| "229": "c\u00e1i th\u00eca", | |
| "230": "c\u1ecf kh\u00f4", | |
| "231": "con kh\u1ec9", | |
| "232": "t\u01b0\u1ee3ng \u0111\u00e0i", | |
| "233": "t\u1ee7 \u0111\u00f4ng", | |
| "234": "hoa h\u1ed3ng", | |
| "235": "chim b\u1ed3 c\u00e2u", | |
| "236": "hay", | |
| "237": "g\u1ea7u m\u00fac", | |
| "238": "b\u00fai t\u00f3c", | |
| "239": "m\u00f3ng vu\u1ed1t", | |
| "240": "xe \u0111i\u1ec7n", | |
| "241": "\u0111\u0129a", | |
| "242": "m\u00e0n", | |
| "243": "\u00e1o kho\u00e1c", | |
| "244": "m\u1eb7t n\u1ea1", | |
| "245": "\u0111\u1ed3 u\u1ed1ng", | |
| "246": "b\u00f2 \u0111\u1ef1c", | |
| "247": "c\u00e1i n\u0129a", | |
| "248": "\u0111\u01b0\u1eddng \u1ed1ng", | |
| "249": "n\u01b0\u1edbc ti\u1ec3u", | |
| "250": "ly", | |
| "251": "\u0111\u00e8n \u0111\u1ec3 b\u00e0n", | |
| "252": "\u0111\u1ed3 n\u1ed9i th\u1ea5t", | |
| "253": "m\u00e1i ch\u00e8o", | |
| "254": "\u0111\u1ea7u m\u00e1y", | |
| "255": "\u0111\u1ea7m", | |
| "256": "m\u0169 l\u01b0\u1ee1i trai", | |
| "257": "truy\u1ec1n h\u00ecnh", | |
| "258": "ph\u00f4 mai", | |
| "259": "c\u00e0 ph\u00ea", | |
| "260": "b\u1ebfn t\u00e0u", | |
| "261": "con d\u00ea", | |
| "262": "c\u1eeda ra v\u00e0o", | |
| "263": "k\u00fd t\u00ean", | |
| "264": "thi\u1ebft b\u1ecb", | |
| "265": "b\u00ecnh hoa", | |
| "266": "bia", | |
| "267": "con d\u1ed1c", | |
| "268": "\u00e1o cho\u00e0ng", | |
| "269": "m\u00f3n tr\u00e1ng mi\u1ec7ng", | |
| "270": "c\u00e2y s\u00e0o", | |
| "271": "thu\u1ed1c l\u00e1", | |
| "272": "m\u1eb7t", | |
| "273": "k\u00ednh r\u00e2m", | |
| "274": "\u0111i\u00eau kh\u1eafc", | |
| "275": "nh\u00e0", | |
| "276": "rau qu\u1ea3", | |
| "277": "tr\u00e1i c\u00e2y", | |
| "278": "qu\u1ea3 cam", | |
| "279": "\u0111\u0129a n\u00e9m", | |
| "280": "ba lan", | |
| "281": "c\u00e2y g\u1eady", | |
| "282": "s\u1eefa", | |
| "283": "h\u1ed9p \u0111\u1ef1ng", | |
| "284": "khung", | |
| "285": "ngo\u00e0i tr\u1eddi", | |
| "286": "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u", | |
| "287": "c\u1edd", | |
| "288": "th\u00f9ng", | |
| "289": "l\u00f2 s\u01b0\u1edfi", | |
| "290": "l\u00e1t c\u1eaft", | |
| "291": "b\u1eafp ch\u00e2n", | |
| "292": "c\u00fan y\u00eau", | |
| "293": "ng\u00e2n h\u00e0ng", | |
| "294": "rau x\u00e0 l\u00e1ch", | |
| "295": "xa l\u1ed9", | |
| "296": "g\u00e0", | |
| "297": "qu\u1ea7n short", | |
| "298": "v\u00f2i n\u01b0\u1edbc", | |
| "299": "m\u0169 b\u1ea3o hi\u1ec3m", | |
| "300": "c\u00f4ng c\u1ee5", | |
| "301": "qu\u1ea3 cam ", | |
| "302": "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft", | |
| "303": "g\u1ea1ch", | |
| "304": "ch\u00ecm xu\u1ed1ng", | |
| "305": "kh\u0103n t\u1eafm", | |
| "306": "l\u00e1t g\u1ea1ch", | |
| "307": "ng\u0103n", | |
| "308": "b\u1ea3ng hi\u1ec7u", | |
| "309": "l\u0103n tr\u00f2n", | |
| "310": "hotdog", | |
| "311": "c\u1ecf", | |
| "312": "b\u00ecnh", | |
| "313": "b\u00ean", | |
| "314": "t\u00e0u ho\u1ea3", | |
| "315": "b\u00e1nh xe", | |
| "316": "lon", | |
| "317": "nh\u00e0 t\u1eafm", | |
| "318": "\u0111\u01b0\u1eddng \u0111ua", | |
| "319": "m\u00e0u s\u1eafc", | |
| "320": "bao b\u00ec", | |
| "321": "th\u00e0nh ph\u1ea7n", | |
| "322": "chim \u01b0ng", | |
| "323": "\u0111i\u1ec3m t\u00e2m", | |
| "324": "d\u0129a ", | |
| "325": "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng", | |
| "326": "h\u00e0ng h\u00f3a", | |
| "327": "pug", | |
| "328": "h\u1ed9p s\u1ed1", | |
| "329": "c\u00e1", | |
| "330": "gi\u1ecf", | |
| "331": "gh\u1ebf s\u00f4 pha", | |
| "332": "qu\u1ea7n \u00e1o", | |
| "333": "tr\u01b0\u1eddng h\u1ee3p", | |
| "334": "b\u00f2", | |
| "335": "v\u00f4 tuy\u1ebfn", | |
| "336": "con thoi", | |
| "337": "theo d\u00f5i", | |
| "338": "\u00e1o ba l\u1ed7", | |
| "339": "d\u00f2ng s\u00f4ng", | |
| "340": "g\u00e0 t\u00e2y", | |
| "341": "d\u1ea5u hi\u1ec7u", | |
| "342": "m\u00e8o con", | |
| "343": "m\u1eaft", | |
| "344": "\u0111\u01b0a \u0111\u00f3n", | |
| "345": "con heo", | |
| "346": "ngo\u00e0i", | |
| "347": "\u0111\u1ed3ng ph\u1ee5c", | |
| "348": "m\u00e1y bay tr\u1ef1c th\u0103ng", | |
| "349": "\u0111\u1ea1i d\u01b0\u01a1ng", | |
| "350": "b\u1ee9c m\u00e0n", | |
| "351": "cam", | |
| "352": "b\u00e1nh hamburger" | |
| }, | |
| "img_size": 224, | |
| "in_chans": 3, | |
| "label2id": null, | |
| "layernorm_embedding": false, | |
| "layernorm_eps": 1e-05, | |
| "max_rel_pos": 0, | |
| "max_source_positions": 1024, | |
| "model_type": "vivqa", | |
| "moe_eval_capacity_token_fraction": 0.25, | |
| "moe_expert_count": 0, | |
| "moe_freq": 0, | |
| "moe_gating_use_fp32": true, | |
| "moe_normalize_gate_prob_before_dropping": false, | |
| "moe_second_expert_policy": "random", | |
| "moe_top1_expert": false, | |
| "multiway": true, | |
| "no_output_layer": true, | |
| "no_scale_embedding": true, | |
| "normalize_output": true, | |
| "patch_size": 16, | |
| "rel_pos_buckets": 0, | |
| "share_encoder_input_output_embed": false, | |
| "subln": true, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.36.2", | |
| "use_xmoe": false, | |
| "vocab_size": -1, | |
| "xpos_rel_pos": false, | |
| "xpos_scale_base": 512 | |
| } | |