Jakob Poncelet commited on
Commit ·
b4d3e53
1
Parent(s): e7e1c5c
First model version
Browse files- checkpoint_best.pt +3 -0
- dict.ltr.txt +89 -0
- finetuning_config.yaml +65 -0
- pretraining_config.yaml +63 -0
checkpoint_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13ee473f13953e0d9053f39367251669e2a006af3d99f5b007dfe2a79b35f851
|
| 3 |
+
size 1140805865
|
dict.ltr.txt
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
| 1425817
|
| 2 |
+
e 1178319
|
| 3 |
+
n 633984
|
| 4 |
+
a 534584
|
| 5 |
+
i 432845
|
| 6 |
+
t 426822
|
| 7 |
+
d 368464
|
| 8 |
+
r 363974
|
| 9 |
+
o 362103
|
| 10 |
+
s 243285
|
| 11 |
+
l 240045
|
| 12 |
+
h 194637
|
| 13 |
+
g 193570
|
| 14 |
+
k 170494
|
| 15 |
+
m 163029
|
| 16 |
+
u 157460
|
| 17 |
+
v 141792
|
| 18 |
+
j 130640
|
| 19 |
+
w 111117
|
| 20 |
+
z 97895
|
| 21 |
+
b 97538
|
| 22 |
+
p 82921
|
| 23 |
+
c 75002
|
| 24 |
+
f 51103
|
| 25 |
+
' 20301
|
| 26 |
+
X 7489
|
| 27 |
+
é 7286
|
| 28 |
+
y 5108
|
| 29 |
+
è 5063
|
| 30 |
+
- 4778
|
| 31 |
+
ë 3361
|
| 32 |
+
x 2022
|
| 33 |
+
q 682
|
| 34 |
+
ï 493
|
| 35 |
+
ü 264
|
| 36 |
+
ö 157
|
| 37 |
+
à 139
|
| 38 |
+
ê 80
|
| 39 |
+
ç 39
|
| 40 |
+
1 38
|
| 41 |
+
á 38
|
| 42 |
+
ä 35
|
| 43 |
+
2 34
|
| 44 |
+
32 30
|
| 45 |
+
áx 16
|
| 46 |
+
38 15
|
| 47 |
+
16 13
|
| 48 |
+
35 12
|
| 49 |
+
4 11
|
| 50 |
+
ô 10
|
| 51 |
+
â 10
|
| 52 |
+
21 10
|
| 53 |
+
& 10
|
| 54 |
+
ñ 8
|
| 55 |
+
314 6
|
| 56 |
+
î 5
|
| 57 |
+
12 5
|
| 58 |
+
24 5
|
| 59 |
+
19 5
|
| 60 |
+
17 5
|
| 61 |
+
40 4
|
| 62 |
+
10 4
|
| 63 |
+
130 4
|
| 64 |
+
52 4
|
| 65 |
+
5 3
|
| 66 |
+
8 3
|
| 67 |
+
20 3
|
| 68 |
+
3 3
|
| 69 |
+
Ö 3
|
| 70 |
+
í 3
|
| 71 |
+
6 2
|
| 72 |
+
04 2
|
| 73 |
+
313 2
|
| 74 |
+
ó 2
|
| 75 |
+
201 1
|
| 76 |
+
67 1
|
| 77 |
+
499 1
|
| 78 |
+
7 1
|
| 79 |
+
45 1
|
| 80 |
+
198 1
|
| 81 |
+
25 1
|
| 82 |
+
902 1
|
| 83 |
+
xq 1
|
| 84 |
+
3xx 1
|
| 85 |
+
66 1
|
| 86 |
+
ù 1
|
| 87 |
+
ò 1
|
| 88 |
+
Å 1
|
| 89 |
+
ú 1
|
finetuning_config.yaml
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# @package _group_
|
| 2 |
+
|
| 3 |
+
common:
|
| 4 |
+
memory_efficient_fp16: true
|
| 5 |
+
log_format: json
|
| 6 |
+
log_interval: 100
|
| 7 |
+
model_parallel_size: 1
|
| 8 |
+
|
| 9 |
+
checkpoint:
|
| 10 |
+
no_epoch_checkpoints: true
|
| 11 |
+
best_checkpoint_metric: wer
|
| 12 |
+
save_dir: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/finetune_VW_base_all
|
| 13 |
+
|
| 14 |
+
task:
|
| 15 |
+
_name: audio_pretraining
|
| 16 |
+
data: /users/spraak/jponcele/BenchmarkingSS/data/cgn_phone_10ms_w2v2_all
|
| 17 |
+
normalize: true #false
|
| 18 |
+
labels: ltr
|
| 19 |
+
segments: true
|
| 20 |
+
max_length: 800000
|
| 21 |
+
|
| 22 |
+
dataset:
|
| 23 |
+
num_workers: 6
|
| 24 |
+
batch_size: 4
|
| 25 |
+
max_tokens: 32000000
|
| 26 |
+
skip_invalid_size_inputs_valid_test: true
|
| 27 |
+
valid_subset: test
|
| 28 |
+
data_buffer_size: 2
|
| 29 |
+
|
| 30 |
+
distributed_training:
|
| 31 |
+
ddp_backend: legacy_ddp
|
| 32 |
+
distributed_world_size: 1
|
| 33 |
+
|
| 34 |
+
criterion:
|
| 35 |
+
_name: ctc
|
| 36 |
+
zero_infinity: true
|
| 37 |
+
|
| 38 |
+
optimization:
|
| 39 |
+
max_update: 500000
|
| 40 |
+
lr: [0.00003]
|
| 41 |
+
sentence_avg: true
|
| 42 |
+
update_freq: [4]
|
| 43 |
+
|
| 44 |
+
optimizer:
|
| 45 |
+
_name: adam
|
| 46 |
+
adam_betas: (0.9,0.98)
|
| 47 |
+
adam_eps: 1e-08
|
| 48 |
+
|
| 49 |
+
lr_scheduler:
|
| 50 |
+
_name: tri_stage
|
| 51 |
+
phase_ratio: [0.1, 0.4, 0.5]
|
| 52 |
+
final_lr_scale: 0.05
|
| 53 |
+
|
| 54 |
+
model:
|
| 55 |
+
_name: wav2vec_ctc
|
| 56 |
+
w2v_path: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/pretrain_w2v2_cgn-unsup-VW_base/checkpoint_74_250000.pt
|
| 57 |
+
apply_mask: true
|
| 58 |
+
mask_prob: 0.65
|
| 59 |
+
mask_channel_prob: 0.5
|
| 60 |
+
mask_channel_length: 64
|
| 61 |
+
layerdrop: 0.1
|
| 62 |
+
activation_dropout: 0.1
|
| 63 |
+
feature_grad_mult: 0.0
|
| 64 |
+
freeze_finetune_updates: 0
|
| 65 |
+
|
pretraining_config.yaml
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# @package _group_
|
| 2 |
+
|
| 3 |
+
common:
|
| 4 |
+
memory_efficient_fp16: true
|
| 5 |
+
log_format: json
|
| 6 |
+
log_interval: 100
|
| 7 |
+
|
| 8 |
+
checkpoint:
|
| 9 |
+
save_interval_updates: 10000
|
| 10 |
+
keep_interval_updates: 1
|
| 11 |
+
no_epoch_checkpoints: true
|
| 12 |
+
save_dir: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/pretrain_w2v2_cgn-unsup-VW_base
|
| 13 |
+
|
| 14 |
+
task:
|
| 15 |
+
_name: audio_pretraining
|
| 16 |
+
data: /users/spraak/jponcele/BenchmarkingSS/data/cgn_unsup_VW_w2v2
|
| 17 |
+
max_sample_size: 250000
|
| 18 |
+
min_sample_size: 4000
|
| 19 |
+
segments: true
|
| 20 |
+
normalize: true
|
| 21 |
+
|
| 22 |
+
dataset:
|
| 23 |
+
num_workers: 6
|
| 24 |
+
#batch_size: 4
|
| 25 |
+
max_tokens: 1400000
|
| 26 |
+
skip_invalid_size_inputs_valid_test: true
|
| 27 |
+
valid_subset: test
|
| 28 |
+
data_buffer_size: 1 #2
|
| 29 |
+
required_batch_size_multiple: 1 #default=8
|
| 30 |
+
|
| 31 |
+
distributed_training:
|
| 32 |
+
distributed_world_size: 1
|
| 33 |
+
ddp_backend: legacy_ddp
|
| 34 |
+
|
| 35 |
+
criterion:
|
| 36 |
+
_name: wav2vec
|
| 37 |
+
infonce: true
|
| 38 |
+
log_keys: ["prob_perplexity","code_perplexity","temp"]
|
| 39 |
+
loss_weights: [0.1, 10]
|
| 40 |
+
|
| 41 |
+
optimization:
|
| 42 |
+
max_update: 400000
|
| 43 |
+
lr: [0.0005]
|
| 44 |
+
update_freq: [32]
|
| 45 |
+
|
| 46 |
+
optimizer:
|
| 47 |
+
_name: adam
|
| 48 |
+
adam_betas: (0.9,0.98)
|
| 49 |
+
adam_eps: 1e-06
|
| 50 |
+
weight_decay: 0.01
|
| 51 |
+
|
| 52 |
+
lr_scheduler:
|
| 53 |
+
_name: polynomial_decay
|
| 54 |
+
warmup_updates: 50000
|
| 55 |
+
|
| 56 |
+
model:
|
| 57 |
+
_name: wav2vec2
|
| 58 |
+
quantize_targets: true
|
| 59 |
+
final_dim: 256
|
| 60 |
+
encoder_layerdrop: 0.05
|
| 61 |
+
dropout_input: 0.1
|
| 62 |
+
dropout_features: 0.1
|
| 63 |
+
feature_grad_mult: 0.1
|