File size: 1,796 Bytes
bbbc9ba
 
 
 
 
 
 
 
 
 
 
 
 
51169e9
 
bbbc9ba
 
 
 
51169e9
bbbc9ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# ##########################################################################################
# Model: Whisper-large-v3 Encoder + Attion pooling for Arabic Dialect Identification
#
# Author: Haroun Elleuch
############################################################################################


pretrained_path: Elyadata/ADI-whisper-ADI20
whisper_hub: openai/whisper-large-v3

n_languages: 20
features_dim: 1280

# For newer versions of SpeechBrain (dev branch), replace with:
# whisper: !new:speechbrain.integrations.huggingface.whisper.Whisper
whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
    source: !ref <whisper_hub>
    encoder_only: True
    freeze_encoder: False
    save_path: whisper_hf

attention_pooling: !new:speechbrain.nnet.pooling.AttentionPooling
    input_dim: !ref <features_dim>

output_mlp: !new:speechbrain.nnet.linear.Linear
    input_size: !ref <features_dim>
    n_neurons: !ref <n_languages>
    bias: False


modules:
    whisper: !ref <whisper>
    attention_pooling: !ref <attention_pooling>
    output_mlp: !ref <output_mlp>

log_softmax: !new:speechbrain.nnet.activations.Softmax
    apply_log: True

label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder

pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
    loadables:
        whisper: !ref <whisper>
        attention_pooling: !ref <attention_pooling>
        output_mlp: !ref <output_mlp>
        label_encoder: !ref <label_encoder>
    paths:
        whisper: !ref <pretrained_path>/whisper.ckpt
        attention_pooling: !ref <pretrained_path>/attention_pooling.ckpt
        output_mlp: !ref <pretrained_path>/output_mlp.ckpt
        label_encoder: !ref <pretrained_path>/dialect_encoder.txt