bird2vec / config.json
fordb's picture
Training in progress, epoch 0
f59da74 verified
{
"_name_or_path": "facebook/wav2vec2-base",
"activation_dropout": 0.0,
"adapter_attn_dim": null,
"adapter_kernel_size": 3,
"adapter_stride": 2,
"add_adapter": false,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2ForSequenceClassification"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"classifier_proj_size": 256,
"codevector_dim": 256,
"contrastive_logits_temperature": 0.1,
"conv_bias": false,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": false,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_norm": "group",
"feat_proj_dropout": 0.1,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"freeze_feat_extract_train": true,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 768,
"id2label": {
"0": "asbfly",
"1": "ashdro1",
"2": "ashpri1",
"3": "asikoe2",
"4": "aspfly1",
"5": "aspswi1",
"6": "barswa",
"7": "bcnher",
"8": "bkcbul1",
"9": "bkrfla1",
"10": "bkskit1",
"11": "bkwsti",
"12": "blakit1",
"13": "blhori1",
"14": "blnmon1",
"15": "blrwar1",
"16": "brcful1",
"17": "brnhao1",
"18": "brwjac1",
"19": "brwowl1",
"20": "bwfshr1",
"21": "comfla1",
"22": "comgre",
"23": "comior1",
"24": "comkin1",
"25": "commoo3",
"26": "commyn",
"27": "comros",
"28": "comsan",
"29": "comtai1",
"30": "copbar1",
"31": "crseag1",
"32": "eaywag1",
"33": "eucdov",
"34": "eurcoo",
"35": "gloibi",
"36": "goflea1",
"37": "graher1",
"38": "grecou1",
"39": "greegr",
"40": "grejun2",
"41": "grewar3",
"42": "grnsan",
"43": "grnwar1",
"44": "grtdro1",
"45": "grynig2",
"46": "grywag",
"47": "gybpri1",
"48": "gyhcaf1",
"49": "hoopoe",
"50": "houspa",
"51": "inbrob1",
"52": "indpit1",
"53": "indrob1",
"54": "indrol2",
"55": "inpher1",
"56": "insowl1",
"57": "junbab2",
"58": "junmyn1",
"59": "junowl1",
"60": "kenplo1",
"61": "labcro1",
"62": "laudov1",
"63": "lirplo",
"64": "litegr",
"65": "litgre1",
"66": "litspi1",
"67": "litswi1",
"68": "marsan",
"69": "pabflo1",
"70": "piebus1",
"71": "piekin1",
"72": "placuc3",
"73": "plapri1",
"74": "plhpar1",
"75": "purher1",
"76": "pursun4",
"77": "putbab1",
"78": "rerswa1",
"79": "revbul",
"80": "rewlap1",
"81": "rocpig",
"82": "rorpar",
"83": "rossta2",
"84": "ruftre2",
"85": "spepic1",
"86": "spodov",
"87": "stbkin1",
"88": "thbwar1",
"89": "tibfly3",
"90": "vefnut1",
"91": "vehpar1",
"92": "wemhar1",
"93": "whbwat1",
"94": "whiter2",
"95": "whtkin2",
"96": "woosan",
"97": "zitcis1"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"asbfly": 0,
"ashdro1": 1,
"ashpri1": 2,
"asikoe2": 3,
"aspfly1": 4,
"aspswi1": 5,
"barswa": 6,
"bcnher": 7,
"bkcbul1": 8,
"bkrfla1": 9,
"bkskit1": 10,
"bkwsti": 11,
"blakit1": 12,
"blhori1": 13,
"blnmon1": 14,
"blrwar1": 15,
"brcful1": 16,
"brnhao1": 17,
"brwjac1": 18,
"brwowl1": 19,
"bwfshr1": 20,
"comfla1": 21,
"comgre": 22,
"comior1": 23,
"comkin1": 24,
"commoo3": 25,
"commyn": 26,
"comros": 27,
"comsan": 28,
"comtai1": 29,
"copbar1": 30,
"crseag1": 31,
"eaywag1": 32,
"eucdov": 33,
"eurcoo": 34,
"gloibi": 35,
"goflea1": 36,
"graher1": 37,
"grecou1": 38,
"greegr": 39,
"grejun2": 40,
"grewar3": 41,
"grnsan": 42,
"grnwar1": 43,
"grtdro1": 44,
"grynig2": 45,
"grywag": 46,
"gybpri1": 47,
"gyhcaf1": 48,
"hoopoe": 49,
"houspa": 50,
"inbrob1": 51,
"indpit1": 52,
"indrob1": 53,
"indrol2": 54,
"inpher1": 55,
"insowl1": 56,
"junbab2": 57,
"junmyn1": 58,
"junowl1": 59,
"kenplo1": 60,
"labcro1": 61,
"laudov1": 62,
"lirplo": 63,
"litegr": 64,
"litgre1": 65,
"litspi1": 66,
"litswi1": 67,
"marsan": 68,
"pabflo1": 69,
"piebus1": 70,
"piekin1": 71,
"placuc3": 72,
"plapri1": 73,
"plhpar1": 74,
"purher1": 75,
"pursun4": 76,
"putbab1": 77,
"rerswa1": 78,
"revbul": 79,
"rewlap1": 80,
"rocpig": 81,
"rorpar": 82,
"rossta2": 83,
"ruftre2": 84,
"spepic1": 85,
"spodov": 86,
"stbkin1": 87,
"thbwar1": 88,
"tibfly3": 89,
"vefnut1": 90,
"vehpar1": 91,
"wemhar1": 92,
"whbwat1": 93,
"whiter2": 94,
"whtkin2": 95,
"woosan": 96,
"zitcis1": 97
},
"layer_norm_eps": 1e-05,
"layerdrop": 0.0,
"mask_channel_length": 10,
"mask_channel_min_space": 1,
"mask_channel_other": 0.0,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_min_space": 1,
"mask_time_other": 0.0,
"mask_time_prob": 0.05,
"mask_time_selection": "static",
"model_type": "wav2vec2",
"no_mask_channel_overlap": false,
"no_mask_time_overlap": false,
"num_adapter_layers": 3,
"num_attention_heads": 12,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 12,
"num_negatives": 100,
"output_hidden_size": 768,
"pad_token_id": 0,
"proj_codevector_dim": 256,
"tdnn_dilation": [
1,
2,
3,
1,
1
],
"tdnn_dim": [
512,
512,
512,
512,
1500
],
"tdnn_kernel": [
5,
3,
3,
1,
1
],
"torch_dtype": "float32",
"transformers_version": "4.40.0",
"use_weighted_layer_sum": false,
"vocab_size": 32,
"xvector_output_dim": 512
}