Iliass Lasri commited on
Commit
e32d9e2
·
1 Parent(s): dacf183

add dinoSR and SpidR

Browse files
DinoSR_original/config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ training:
2
+ run_name: dinosr_base_original
3
+ epochs: 150
4
+ learning_rate: 0.0001
5
+ log_interval: 100
6
+ checkpoint_dir: null
7
+ resume_from: /home/infres/abrik-22/snlp_project/outputs/dinosr_base_original/2026-03-23/09-35-33/round_0/E1_last.pt
8
+ n_iterative_pseudolabeling: 3
9
+ lr_scheduler:
10
+ _target_: torch.optim.lr_scheduler.CosineAnnealingLR
11
+ T_max: ${training.epochs}
12
+ eta_min: 1.0e-06
13
+ lr_scheduler_start_epoch: 150
14
+ dataset:
15
+ root: data/LibriSpeech
16
+ train_split: train-clean-100
17
+ test_split: test-clean
18
+ batch_size: 32
19
+ num_workers: 1
20
+ noise_dir: noise_fullband
21
+ max_audio_length: 160000
22
+ augmentations:
23
+ max_augs: 4
24
+ time_stretch: true
25
+ pitch_shift: true
26
+ reverberation: true
27
+ noise: true
28
+ rir_dir: data/rirs
29
+ activate_extra_augs: true
30
+ echo:
31
+ enabled: true
32
+ volume_range:
33
+ - 0.1
34
+ - 0.5
35
+ duration_range:
36
+ - 0.1
37
+ - 0.5
38
+ random_noise:
39
+ enabled: true
40
+ noise_std: 0.001
41
+ pink_noise:
42
+ enabled: true
43
+ noise_std: 0.01
44
+ lowpass_filter:
45
+ enabled: true
46
+ cutoff_freq: 5000
47
+ highpass_filter:
48
+ enabled: true
49
+ cutoff_freq: 500
50
+ bandpass_filter:
51
+ enabled: true
52
+ cutoff_freq_low: 300
53
+ cutoff_freq_high: 8000
54
+ smooth:
55
+ enabled: true
56
+ window_size_range:
57
+ - 2
58
+ - 10
59
+ boost_audio:
60
+ enabled: true
61
+ amount: 20
62
+ duck_audio:
63
+ enabled: true
64
+ amount: 20
65
+ updownresample:
66
+ enabled: true
67
+ intermediate_freq: 32000
68
+ model:
69
+ name: dinosr_base_original
70
+ layer: 5
71
+ vocab_size: 256
72
+ kind_kmeans: kmeans
73
+ quantizer:
74
+ hidden_dim: 256
DinoSR_original/round_0/E1_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebba1a60f7464cc06a2b38222f07471b2856ee58aafbdbbbf2906ec92b690c9
3
+ size 3961480
DinoSR_original/round_0/E1_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df12f6bd780bdae783bdf453a3e64db055aa36f833332e5ec995c4549d49a501
3
+ size 3961480
DinoSR_original/round_0/tensorboard/events.out.tfevents.1774341402.node20.1724484.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b81295047bef9b42ae3f165cdf91c3cedc09223d3c8259e2c8ff55d9a54bfd
3
+ size 8152
DinoSR_reproduced/config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ training:
2
+ run_name: dinosr_base_reproduced
3
+ epochs: 150
4
+ learning_rate: 0.0001
5
+ log_interval: 100
6
+ checkpoint_dir: null
7
+ resume_from: /home/infres/abrik-22/snlp_project/outputs/dinosr_base_reproduced/2026-03-23/09-34-30/round_0/E1_last.pt
8
+ n_iterative_pseudolabeling: 3
9
+ lr_scheduler:
10
+ _target_: torch.optim.lr_scheduler.CosineAnnealingLR
11
+ T_max: ${training.epochs}
12
+ eta_min: 1.0e-06
13
+ lr_scheduler_start_epoch: 150
14
+ dataset:
15
+ root: data/LibriSpeech
16
+ train_split: train-clean-100
17
+ test_split: test-clean
18
+ batch_size: 32
19
+ num_workers: 1
20
+ noise_dir: noise_fullband
21
+ max_audio_length: 160000
22
+ augmentations:
23
+ max_augs: 4
24
+ time_stretch: true
25
+ pitch_shift: true
26
+ reverberation: true
27
+ noise: true
28
+ rir_dir: data/rirs
29
+ activate_extra_augs: true
30
+ echo:
31
+ enabled: true
32
+ volume_range:
33
+ - 0.1
34
+ - 0.5
35
+ duration_range:
36
+ - 0.1
37
+ - 0.5
38
+ random_noise:
39
+ enabled: true
40
+ noise_std: 0.001
41
+ pink_noise:
42
+ enabled: true
43
+ noise_std: 0.01
44
+ lowpass_filter:
45
+ enabled: true
46
+ cutoff_freq: 5000
47
+ highpass_filter:
48
+ enabled: true
49
+ cutoff_freq: 500
50
+ bandpass_filter:
51
+ enabled: true
52
+ cutoff_freq_low: 300
53
+ cutoff_freq_high: 8000
54
+ smooth:
55
+ enabled: true
56
+ window_size_range:
57
+ - 2
58
+ - 10
59
+ boost_audio:
60
+ enabled: true
61
+ amount: 20
62
+ duck_audio:
63
+ enabled: true
64
+ amount: 20
65
+ updownresample:
66
+ enabled: true
67
+ intermediate_freq: 32000
68
+ model:
69
+ name: dinosr_base_reproduced
70
+ layer: 5
71
+ vocab_size: 256
72
+ kind_kmeans: kmeans
73
+ quantizer:
74
+ hidden_dim: 256
DinoSR_reproduced/round_0/E1_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2150f0bdc702335f6793473834b6603b57af0ef4c212a2dcbf3c992304915334
3
+ size 3961544
DinoSR_reproduced/round_0/E1_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2314b20a8f777ad78c5ecd3043e1da9252c286b3d63039f100a3ce4d3d7aa95a
3
+ size 3961544
DinoSR_reproduced/round_0/tensorboard/events.out.tfevents.1774341408.node21.246280.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9d9ed16084d176fd4fcecafb3608b5f4970f19faacefff99067021db6528bc
3
+ size 7712
SpidR/256/config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ training:
2
+ run_name: spidr
3
+ epochs: 150
4
+ learning_rate: 0.0001
5
+ log_interval: 100
6
+ checkpoint_dir: null
7
+ resume_from: /home/infres/abrik-22/snlp_project/outputs/256/2026-03-22/18-41-33/round_0/E1_last.pt
8
+ n_iterative_pseudolabeling: 3
9
+ lr_scheduler:
10
+ _target_: torch.optim.lr_scheduler.CosineAnnealingLR
11
+ T_max: ${training.epochs}
12
+ eta_min: 1.0e-06
13
+ lr_scheduler_start_epoch: 150
14
+ dataset:
15
+ root: data/LibriSpeech
16
+ train_split: train-clean-100
17
+ test_split: test-clean
18
+ batch_size: 32
19
+ num_workers: 1
20
+ noise_dir: noise_fullband
21
+ max_audio_length: 160000
22
+ augmentations:
23
+ max_augs: 4
24
+ time_stretch: true
25
+ pitch_shift: true
26
+ reverberation: true
27
+ noise: true
28
+ rir_dir: data/rirs
29
+ activate_extra_augs: true
30
+ echo:
31
+ enabled: true
32
+ volume_range:
33
+ - 0.1
34
+ - 0.5
35
+ duration_range:
36
+ - 0.1
37
+ - 0.5
38
+ random_noise:
39
+ enabled: true
40
+ noise_std: 0.001
41
+ pink_noise:
42
+ enabled: true
43
+ noise_std: 0.01
44
+ lowpass_filter:
45
+ enabled: true
46
+ cutoff_freq: 5000
47
+ highpass_filter:
48
+ enabled: true
49
+ cutoff_freq: 500
50
+ bandpass_filter:
51
+ enabled: true
52
+ cutoff_freq_low: 300
53
+ cutoff_freq_high: 8000
54
+ smooth:
55
+ enabled: true
56
+ window_size_range:
57
+ - 2
58
+ - 10
59
+ boost_audio:
60
+ enabled: true
61
+ amount: 20
62
+ duck_audio:
63
+ enabled: true
64
+ amount: 20
65
+ updownresample:
66
+ enabled: true
67
+ intermediate_freq: 32000
68
+ model:
69
+ name: spidr_base
70
+ layer: 6
71
+ vocab_size: 256
72
+ kind_kmeans: kmeans
73
+ quantizer:
74
+ hidden_dim: 256
SpidR/256/round_0/E1_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060e9fad2b4a5f9262485c582518b28145c51161e5bf54798492aac004813649
3
+ size 3961544
SpidR/256/round_0/E1_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884c648614fd0ba0826a5d7a84c8a20c10ca3a5428a80ffdd5a8072ce7875cfd
3
+ size 3961544
SpidR/256/round_0/tensorboard/events.out.tfevents.1774296494.node16.189390.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b5d59c76d66df40fad02b56b65224fef5b3975147de09c5568b59768cc6595
3
+ size 15116
SpidR/256_no_extra_augs/config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ training:
2
+ run_name: spidr_base
3
+ epochs: 150
4
+ learning_rate: 0.0001
5
+ log_interval: 100
6
+ checkpoint_dir: null
7
+ resume_from: /home/infres/abrik-22/snlp_project/outputs/spidr_base/2026-03-23/22-15-04/round_0/E1_last.pt
8
+ n_iterative_pseudolabeling: 3
9
+ lr_scheduler:
10
+ _target_: torch.optim.lr_scheduler.CosineAnnealingLR
11
+ T_max: ${training.epochs}
12
+ eta_min: 1.0e-06
13
+ lr_scheduler_start_epoch: 150
14
+ dataset:
15
+ root: data/LibriSpeech
16
+ train_split: train-clean-100
17
+ test_split: test-clean
18
+ batch_size: 32
19
+ num_workers: 1
20
+ noise_dir: noise_fullband
21
+ max_audio_length: 160000
22
+ augmentations:
23
+ max_augs: 1
24
+ time_stretch: true
25
+ pitch_shift: true
26
+ reverberation: true
27
+ noise: true
28
+ rir_dir: data/rirs
29
+ activate_extra_augs: false
30
+ echo:
31
+ enabled: true
32
+ volume_range:
33
+ - 0.1
34
+ - 0.5
35
+ duration_range:
36
+ - 0.1
37
+ - 0.5
38
+ random_noise:
39
+ enabled: true
40
+ noise_std: 0.001
41
+ pink_noise:
42
+ enabled: true
43
+ noise_std: 0.01
44
+ lowpass_filter:
45
+ enabled: true
46
+ cutoff_freq: 5000
47
+ highpass_filter:
48
+ enabled: true
49
+ cutoff_freq: 500
50
+ bandpass_filter:
51
+ enabled: true
52
+ cutoff_freq_low: 300
53
+ cutoff_freq_high: 8000
54
+ smooth:
55
+ enabled: true
56
+ window_size_range:
57
+ - 2
58
+ - 10
59
+ boost_audio:
60
+ enabled: true
61
+ amount: 20
62
+ duck_audio:
63
+ enabled: true
64
+ amount: 20
65
+ updownresample:
66
+ enabled: true
67
+ intermediate_freq: 32000
68
+ model:
69
+ name: spidr_base
70
+ layer: 6
71
+ vocab_size: 256
72
+ kind_kmeans: kmeans
73
+ quantizer:
74
+ hidden_dim: 256
SpidR/256_no_extra_augs/round_0/E1_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2a2d11d84fd6eeaf38325341a90b02e18c30506fc527655c1667f2f50cc058
3
+ size 3961608
SpidR/256_no_extra_augs/round_0/E1_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae54872e8b5f7a7efa3d120fdc456b6ce375e4f024bd226cde2e4e51db0007e
3
+ size 3961608
SpidR/256_no_extra_augs/round_0/tensorboard/events.out.tfevents.1774345454.node47.1425577.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b24e73db74ad8005cca1f249be0c046d577920fe06b0db057cd2617e7c95c04
3
+ size 9060