{ "_name_or_path": "facebook/wav2vec2-base", "activation_dropout": 0.0, "adapter_attn_dim": null, "adapter_kernel_size": 3, "adapter_stride": 2, "add_adapter": false, "apply_spec_augment": true, "architectures": [ "Wav2Vec2ForSequenceClassification" ], "attention_dropout": 0.1, "bos_token_id": 1, "classifier_proj_size": 256, "codevector_dim": 256, "contrastive_logits_temperature": 0.1, "conv_bias": false, "conv_dim": [ 512, 512, 512, 512, 512, 512, 512 ], "conv_kernel": [ 10, 3, 3, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "diversity_loss_weight": 0.1, "do_stable_layer_norm": false, "eos_token_id": 2, "feat_extract_activation": "gelu", "feat_extract_norm": "group", "feat_proj_dropout": 0.1, "feat_quantizer_dropout": 0.0, "final_dropout": 0.0, "freeze_feat_extract_train": true, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_size": 768, "id2label": { "0": "asbfly", "1": "ashdro1", "2": "ashpri1", "3": "asikoe2", "4": "aspfly1", "5": "aspswi1", "6": "barswa", "7": "bcnher", "8": "bkcbul1", "9": "bkrfla1", "10": "bkskit1", "11": "bkwsti", "12": "blakit1", "13": "blhori1", "14": "blnmon1", "15": "blrwar1", "16": "brcful1", "17": "brnhao1", "18": "brwjac1", "19": "brwowl1", "20": "bwfshr1", "21": "comfla1", "22": "comgre", "23": "comior1", "24": "comkin1", "25": "commoo3", "26": "commyn", "27": "comros", "28": "comsan", "29": "comtai1", "30": "copbar1", "31": "crseag1", "32": "eaywag1", "33": "eucdov", "34": "eurcoo", "35": "gloibi", "36": "goflea1", "37": "graher1", "38": "grecou1", "39": "greegr", "40": "grejun2", "41": "grewar3", "42": "grnsan", "43": "grnwar1", "44": "grtdro1", "45": "grynig2", "46": "grywag", "47": "gybpri1", "48": "gyhcaf1", "49": "hoopoe", "50": "houspa", "51": "inbrob1", "52": "indpit1", "53": "indrob1", "54": "indrol2", "55": "inpher1", "56": "insowl1", "57": "junbab2", "58": "junmyn1", "59": "junowl1", "60": "kenplo1", "61": "labcro1", "62": "laudov1", "63": "lirplo", "64": "litegr", "65": "litgre1", "66": "litspi1", "67": "litswi1", "68": "marsan", "69": "pabflo1", "70": "piebus1", "71": "piekin1", "72": "placuc3", "73": "plapri1", "74": "plhpar1", "75": "purher1", "76": "pursun4", "77": "putbab1", "78": "rerswa1", "79": "revbul", "80": "rewlap1", "81": "rocpig", "82": "rorpar", "83": "rossta2", "84": "ruftre2", "85": "spepic1", "86": "spodov", "87": "stbkin1", "88": "thbwar1", "89": "tibfly3", "90": "vefnut1", "91": "vehpar1", "92": "wemhar1", "93": "whbwat1", "94": "whiter2", "95": "whtkin2", "96": "woosan", "97": "zitcis1" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "asbfly": 0, "ashdro1": 1, "ashpri1": 2, "asikoe2": 3, "aspfly1": 4, "aspswi1": 5, "barswa": 6, "bcnher": 7, "bkcbul1": 8, "bkrfla1": 9, "bkskit1": 10, "bkwsti": 11, "blakit1": 12, "blhori1": 13, "blnmon1": 14, "blrwar1": 15, "brcful1": 16, "brnhao1": 17, "brwjac1": 18, "brwowl1": 19, "bwfshr1": 20, "comfla1": 21, "comgre": 22, "comior1": 23, "comkin1": 24, "commoo3": 25, "commyn": 26, "comros": 27, "comsan": 28, "comtai1": 29, "copbar1": 30, "crseag1": 31, "eaywag1": 32, "eucdov": 33, "eurcoo": 34, "gloibi": 35, "goflea1": 36, "graher1": 37, "grecou1": 38, "greegr": 39, "grejun2": 40, "grewar3": 41, "grnsan": 42, "grnwar1": 43, "grtdro1": 44, "grynig2": 45, "grywag": 46, "gybpri1": 47, "gyhcaf1": 48, "hoopoe": 49, "houspa": 50, "inbrob1": 51, "indpit1": 52, "indrob1": 53, "indrol2": 54, "inpher1": 55, "insowl1": 56, "junbab2": 57, "junmyn1": 58, "junowl1": 59, "kenplo1": 60, "labcro1": 61, "laudov1": 62, "lirplo": 63, "litegr": 64, "litgre1": 65, "litspi1": 66, "litswi1": 67, "marsan": 68, "pabflo1": 69, "piebus1": 70, "piekin1": 71, "placuc3": 72, "plapri1": 73, "plhpar1": 74, "purher1": 75, "pursun4": 76, "putbab1": 77, "rerswa1": 78, "revbul": 79, "rewlap1": 80, "rocpig": 81, "rorpar": 82, "rossta2": 83, "ruftre2": 84, "spepic1": 85, "spodov": 86, "stbkin1": 87, "thbwar1": 88, "tibfly3": 89, "vefnut1": 90, "vehpar1": 91, "wemhar1": 92, "whbwat1": 93, "whiter2": 94, "whtkin2": 95, "woosan": 96, "zitcis1": 97 }, "layer_norm_eps": 1e-05, "layerdrop": 0.0, "mask_channel_length": 10, "mask_channel_min_space": 1, "mask_channel_other": 0.0, "mask_channel_prob": 0.0, "mask_channel_selection": "static", "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_min_space": 1, "mask_time_other": 0.0, "mask_time_prob": 0.05, "mask_time_selection": "static", "model_type": "wav2vec2", "no_mask_channel_overlap": false, "no_mask_time_overlap": false, "num_adapter_layers": 3, "num_attention_heads": 12, "num_codevector_groups": 2, "num_codevectors_per_group": 320, "num_conv_pos_embedding_groups": 16, "num_conv_pos_embeddings": 128, "num_feat_extract_layers": 7, "num_hidden_layers": 12, "num_negatives": 100, "output_hidden_size": 768, "pad_token_id": 0, "proj_codevector_dim": 256, "tdnn_dilation": [ 1, 2, 3, 1, 1 ], "tdnn_dim": [ 512, 512, 512, 512, 1500 ], "tdnn_kernel": [ 5, 3, 3, 1, 1 ], "torch_dtype": "float32", "transformers_version": "4.40.0", "use_weighted_layer_sum": false, "vocab_size": 32, "xvector_output_dim": 512 }