heboya8 commited on
Commit
a8e9c3f
·
verified ·
1 Parent(s): 7a1a722

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +47 -0
  2. latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml +149 -0
  3. latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt +3 -0
  4. latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt +3 -0
  5. latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav +3 -0
  6. testset_data/audiocaps/05OJDYeHLMc.wav +3 -0
  7. testset_data/audiocaps/0Wdh45yt7tY.wav +3 -0
  8. testset_data/audiocaps/2RMOegT2Jn8.wav +3 -0
  9. testset_data/audiocaps/2gvyOxKuQPY.wav +3 -0
  10. testset_data/audiocaps/3IYd8cCmUkQ.wav +3 -0
  11. testset_data/audiocaps/3Xoz87_SUdw.wav +3 -0
  12. testset_data/audiocaps/4mtfOkzOvBI.wav +3 -0
  13. testset_data/audiocaps/6uIOGE36tWo.wav +3 -0
  14. testset_data/audiocaps/7_Sr2zv1sQc.wav +3 -0
  15. testset_data/audiocaps/8Bu8CkR1xZY.wav +3 -0
  16. testset_data/audiocaps/A3vsvL-Yx0Q.wav +3 -0
  17. testset_data/audiocaps/AHrUfa2H_5s.wav +3 -0
  18. testset_data/audiocaps/B90BOtSOD2Q.wav +3 -0
  19. testset_data/audiocaps/Bl-lCgr5hGY.wav +3 -0
  20. testset_data/audiocaps/ByOqw8M2U-Q.wav +3 -0
  21. testset_data/audiocaps/Cy3HWnwMLyI.wav +3 -0
  22. testset_data/audiocaps/D3f5VIJYR7M.wav +3 -0
  23. testset_data/audiocaps/DW3z-ByrfWY.wav +3 -0
  24. testset_data/audiocaps/EOaQnfDjVyo.wav +3 -0
  25. testset_data/audiocaps/Fa1c4qfBqzE.wav +3 -0
  26. testset_data/audiocaps/HpkPTa1fQDE.wav +3 -0
  27. testset_data/audiocaps/KpwdlYIdtfs.wav +3 -0
  28. testset_data/audiocaps/LAHWV6fZwUk.wav +3 -0
  29. testset_data/audiocaps/NSyqj1DXZKg.wav +3 -0
  30. testset_data/audiocaps/NzVg-cFQJbE.wav +3 -0
  31. testset_data/audiocaps/TkclVqlyKx4.wav +3 -0
  32. testset_data/audiocaps/VkXLtUx-RmI.wav +3 -0
  33. testset_data/audiocaps/XXBVsNt2Qr8.wav +3 -0
  34. testset_data/audiocaps/YwUa3OS92ZQ.wav +3 -0
  35. testset_data/audiocaps/_gWEpDgPAho.wav +3 -0
  36. testset_data/audiocaps/_u2cNlW5DxQ.wav +3 -0
  37. testset_data/audiocaps/bZuXMxR2S4U.wav +3 -0
  38. testset_data/audiocaps/bl-eQ8XD5CY.wav +3 -0
  39. testset_data/audiocaps/dPCj4WhTZ3c.wav +3 -0
  40. testset_data/audiocaps/dTVkk4GSmNA.wav +3 -0
  41. testset_data/audiocaps/fJ6ZeWYfLjA.wav +3 -0
  42. testset_data/audiocaps/gW33LYEvoaw.wav +3 -0
  43. testset_data/audiocaps/hM88FG1_D5Q.wav +3 -0
  44. testset_data/audiocaps/jqiD3VeM_hY.wav +3 -0
  45. testset_data/audiocaps/mFcHGbnNtSQ.wav +3 -0
  46. testset_data/audiocaps/oxZjkE984Uo.wav +3 -0
  47. testset_data/audiocaps/s72505MIhz8.wav +3 -0
  48. testset_data/audiocaps/xBDcJKb-9vk.wav +3 -0
  49. testset_data/audiocaps/xmaVWayBvaY.wav +3 -0
  50. testset_data/audiocaps/y6iMm7Pltq0.wav +3 -0
.gitattributes CHANGED
@@ -33,3 +33,50 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav filter=lfs diff=lfs merge=lfs -text
37
+ testset_data/audiocaps/8Bu8CkR1xZY.wav filter=lfs diff=lfs merge=lfs -text
38
+ testset_data/audiocaps/AHrUfa2H_5s.wav filter=lfs diff=lfs merge=lfs -text
39
+ testset_data/audiocaps/XXBVsNt2Qr8.wav filter=lfs diff=lfs merge=lfs -text
40
+ testset_data/audiocaps/3IYd8cCmUkQ.wav filter=lfs diff=lfs merge=lfs -text
41
+ testset_data/audiocaps/hM88FG1_D5Q.wav filter=lfs diff=lfs merge=lfs -text
42
+ testset_data/audiocaps/bl-eQ8XD5CY.wav filter=lfs diff=lfs merge=lfs -text
43
+ testset_data/audiocaps/B90BOtSOD2Q.wav filter=lfs diff=lfs merge=lfs -text
44
+ testset_data/audiocaps/6uIOGE36tWo.wav filter=lfs diff=lfs merge=lfs -text
45
+ testset_data/audiocaps/HpkPTa1fQDE.wav filter=lfs diff=lfs merge=lfs -text
46
+ testset_data/audiocaps/s72505MIhz8.wav filter=lfs diff=lfs merge=lfs -text
47
+ testset_data/audiocaps/2gvyOxKuQPY.wav filter=lfs diff=lfs merge=lfs -text
48
+ testset_data/audiocaps/bZuXMxR2S4U.wav filter=lfs diff=lfs merge=lfs -text
49
+ testset_data/audiocaps/Cy3HWnwMLyI.wav filter=lfs diff=lfs merge=lfs -text
50
+ testset_data/audiocaps/_gWEpDgPAho.wav filter=lfs diff=lfs merge=lfs -text
51
+ testset_data/audiocaps/_u2cNlW5DxQ.wav filter=lfs diff=lfs merge=lfs -text
52
+ testset_data/audiocaps/KpwdlYIdtfs.wav filter=lfs diff=lfs merge=lfs -text
53
+ testset_data/audiocaps/EOaQnfDjVyo.wav filter=lfs diff=lfs merge=lfs -text
54
+ testset_data/audiocaps/dTVkk4GSmNA.wav filter=lfs diff=lfs merge=lfs -text
55
+ testset_data/audiocaps/ByOqw8M2U-Q.wav filter=lfs diff=lfs merge=lfs -text
56
+ testset_data/audiocaps/xmaVWayBvaY.wav filter=lfs diff=lfs merge=lfs -text
57
+ testset_data/audiocaps/LAHWV6fZwUk.wav filter=lfs diff=lfs merge=lfs -text
58
+ testset_data/audiocaps/DW3z-ByrfWY.wav filter=lfs diff=lfs merge=lfs -text
59
+ testset_data/audiocaps/gW33LYEvoaw.wav filter=lfs diff=lfs merge=lfs -text
60
+ testset_data/audiocaps/7_Sr2zv1sQc.wav filter=lfs diff=lfs merge=lfs -text
61
+ testset_data/audiocaps/fJ6ZeWYfLjA.wav filter=lfs diff=lfs merge=lfs -text
62
+ testset_data/audiocaps/TkclVqlyKx4.wav filter=lfs diff=lfs merge=lfs -text
63
+ testset_data/audiocaps/Fa1c4qfBqzE.wav filter=lfs diff=lfs merge=lfs -text
64
+ testset_data/audiocaps/dPCj4WhTZ3c.wav filter=lfs diff=lfs merge=lfs -text
65
+ testset_data/audiocaps/D3f5VIJYR7M.wav filter=lfs diff=lfs merge=lfs -text
66
+ testset_data/audiocaps/3Xoz87_SUdw.wav filter=lfs diff=lfs merge=lfs -text
67
+ testset_data/audiocaps/oxZjkE984Uo.wav filter=lfs diff=lfs merge=lfs -text
68
+ testset_data/audiocaps/0Wdh45yt7tY.wav filter=lfs diff=lfs merge=lfs -text
69
+ testset_data/audiocaps/A3vsvL-Yx0Q.wav filter=lfs diff=lfs merge=lfs -text
70
+ testset_data/audiocaps/2RMOegT2Jn8.wav filter=lfs diff=lfs merge=lfs -text
71
+ testset_data/audiocaps/jqiD3VeM_hY.wav filter=lfs diff=lfs merge=lfs -text
72
+ testset_data/audiocaps/NSyqj1DXZKg.wav filter=lfs diff=lfs merge=lfs -text
73
+ testset_data/audiocaps/NzVg-cFQJbE.wav filter=lfs diff=lfs merge=lfs -text
74
+ testset_data/audiocaps/YwUa3OS92ZQ.wav filter=lfs diff=lfs merge=lfs -text
75
+ testset_data/audiocaps/Bl-lCgr5hGY.wav filter=lfs diff=lfs merge=lfs -text
76
+ testset_data/audiocaps/yrme-KRBvzk.wav filter=lfs diff=lfs merge=lfs -text
77
+ testset_data/audiocaps/mFcHGbnNtSQ.wav filter=lfs diff=lfs merge=lfs -text
78
+ testset_data/audiocaps/y6iMm7Pltq0.wav filter=lfs diff=lfs merge=lfs -text
79
+ testset_data/audiocaps/xBDcJKb-9vk.wav filter=lfs diff=lfs merge=lfs -text
80
+ testset_data/audiocaps/4mtfOkzOvBI.wav filter=lfs diff=lfs merge=lfs -text
81
+ testset_data/audiocaps/05OJDYeHLMc.wav filter=lfs diff=lfs merge=lfs -text
82
+ testset_data/audiocaps/VkXLtUx-RmI.wav filter=lfs diff=lfs merge=lfs -text
latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ metadata_root: "./data/metadata/dataset_root.json"
2
+ log_directory: "./log/latent_diffusion"
3
+ project: "audioldm"
4
+ precision: "high"
5
+
6
+ variables:
7
+ sampling_rate: &sampling_rate 16000
8
+ mel_bins: &mel_bins 64
9
+ latent_embed_dim: &latent_embed_dim 8
10
+ latent_t_size: &latent_t_size 256 # TODO might need to change
11
+ latent_f_size: &latent_f_size 16
12
+ in_channels: &unet_in_channels 8
13
+ optimize_ddpm_parameter: &optimize_ddpm_parameter true
14
+ optimize_gpt: &optimize_gpt true
15
+ warmup_steps: &warmup_steps 2000
16
+
17
+ data:
18
+ train: ["audiocaps"]
19
+ val: "audiocaps"
20
+ test: "audiocaps"
21
+ class_label_indices: "audioset_eval_subset"
22
+ dataloader_add_ons: []
23
+
24
+ step:
25
+ validation_every_n_epochs: 5
26
+ save_checkpoint_every_n_steps: 40000
27
+ max_steps: 800000
28
+ save_top_k: 2
29
+
30
+ preprocessing:
31
+ audio:
32
+ sampling_rate: *sampling_rate
33
+ max_wav_value: 32768.0
34
+ duration: 10.24
35
+ stft:
36
+ filter_length: 1024
37
+ hop_length: 160
38
+ win_length: 1024
39
+ mel:
40
+ n_mel_channels: *mel_bins
41
+ mel_fmin: 0
42
+ mel_fmax: 8000
43
+ num_workers: 2
44
+
45
+ augmentation:
46
+ mixup: 0.0
47
+
48
+ addition_params:
49
+ gradient_accumulation_steps: 1
50
+
51
+ model:
52
+ target: audioldm_train.modules.latent_diffusion.ddpm.LatentDiffusion
53
+ params:
54
+ # Autoencoder
55
+ first_stage_config:
56
+ base_learning_rate: 8.0e-06
57
+ target: audioldm_train.modules.latent_encoder.autoencoder.AutoencoderKL
58
+ params:
59
+ reload_from_ckpt: "ckpts/vae_mel_16k_64bins.ckpt"
60
+ sampling_rate: *sampling_rate
61
+ batchsize: 3
62
+ monitor: val/rec_loss
63
+ image_key: fbank
64
+ subband: 1
65
+ embed_dim: *latent_embed_dim
66
+ time_shuffle: 1
67
+ lossconfig:
68
+ target: audioldm_train.losses.LPIPSWithDiscriminator
69
+ params:
70
+ disc_start: 50001
71
+ kl_weight: 1000.0
72
+ disc_weight: 0.5
73
+ disc_in_channels: 1
74
+ ddconfig:
75
+ double_z: true
76
+ mel_bins: *mel_bins # The frequency bins of mel spectrogram
77
+ z_channels: 8
78
+ resolution: 256
79
+ downsample_time: false
80
+ in_channels: 1
81
+ out_ch: 1
82
+ ch: 128
83
+ ch_mult:
84
+ - 1
85
+ - 2
86
+ - 4
87
+ num_res_blocks: 2
88
+ attn_resolutions: []
89
+ dropout: 0.0
90
+
91
+ # Other parameters
92
+ base_learning_rate: 1.0e-4
93
+ warmup_steps: *warmup_steps
94
+ optimize_ddpm_parameter: *optimize_ddpm_parameter
95
+ sampling_rate: *sampling_rate
96
+ batchsize: 1
97
+ linear_start: 0.0015
98
+ linear_end: 0.0195
99
+ num_timesteps_cond: 1
100
+ log_every_t: 200
101
+ timesteps: 1000
102
+ unconditional_prob_cfg: 0.1
103
+ parameterization: eps # [eps, x0, v]
104
+ first_stage_key: fbank
105
+ latent_t_size: *latent_t_size # TODO might need to change
106
+ latent_f_size: *latent_f_size
107
+ channels: *latent_embed_dim # TODO might need to change
108
+ monitor: val/loss_simple_ema
109
+ scale_by_std: true
110
+ unet_config:
111
+ target: audioldm_train.modules.diffusionmodules.openaimodel.UNetModel
112
+ params:
113
+ image_size: 64
114
+ extra_film_condition_dim: 512 # If you use film as extra condition, set this parameter. For example if you have two conditioning vectors each have dimension 512, then this number would be 1024
115
+ # context_dim:
116
+ # - 768
117
+ in_channels: *unet_in_channels # The input channel of the UNet model
118
+ out_channels: *latent_embed_dim # TODO might need to change
119
+ model_channels: 128 # TODO might need to change
120
+ attention_resolutions:
121
+ - 8
122
+ - 4
123
+ - 2
124
+ num_res_blocks: 2
125
+ channel_mult:
126
+ - 1
127
+ - 2
128
+ - 3
129
+ - 5
130
+ num_head_channels: 32
131
+ use_spatial_transformer: true
132
+ transformer_depth: 1
133
+ extra_sa_layer: false
134
+
135
+ cond_stage_config:
136
+ film_clap_cond1:
137
+ cond_stage_key: text
138
+ conditioning_key: film
139
+ target: audioldm_train.conditional_models.CLAPAudioEmbeddingClassifierFreev2
140
+ params:
141
+ pretrained_path: ckpts/clap_htsat_tiny.pt
142
+ sampling_rate: 16000
143
+ embed_mode: text # or text
144
+ amodel: HTSAT-tiny
145
+
146
+ evaluation_params:
147
+ unconditional_guidance_scale: 3.5
148
+ ddim_sampling_steps: 200
149
+ n_candidates_per_samples: 3
latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81874520599c1cdc48f87af9a3d41ce228de409b9a31eae73e2c218819ee7feb
3
+ size 4910952259
latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8938853f65a559040b00d8164b58aa541396d807b5a6e3ba8c8ab9ef5285a9
3
+ size 4910952259
latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:325f313d578e4057eab5474cae4744464936581e7b71446bfbb003f96fc343eb
3
+ size 327788
testset_data/audiocaps/05OJDYeHLMc.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb94d0d33f4b21375b3bef8d2ffd4ea063e4ad1bf08751cceacdb3ae3e7c400
3
+ size 320044
testset_data/audiocaps/0Wdh45yt7tY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f01912fdf5fae71dde63f09749570faf29c16ff8b289e3e23f88a681890cc45
3
+ size 320044
testset_data/audiocaps/2RMOegT2Jn8.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020e2365d24b30c3ef632e9195bbe9dd56b643e6691bd3a6b44572df3bee92b2
3
+ size 320044
testset_data/audiocaps/2gvyOxKuQPY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef302a002971776b76428d76769766ae847f291f1ace33099b9e225f5ada710b
3
+ size 320044
testset_data/audiocaps/3IYd8cCmUkQ.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fb31647ed1e05f2eaf7bdede7f9097ee0cabad07fec6168b6d7154e102a5a6
3
+ size 320044
testset_data/audiocaps/3Xoz87_SUdw.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee133da9f25dcb3a20a0299f732a39208be2c6f2546a5b14b5973db9f858b2f6
3
+ size 320044
testset_data/audiocaps/4mtfOkzOvBI.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b245df371a41e0949fc0ef1181efaf1438e610d76348aa174303f19eb8a53065
3
+ size 320044
testset_data/audiocaps/6uIOGE36tWo.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be77f7f2f24d4fa6b1b32e9b6bf78d3d391ec1ad9a0a3cc65e2f6c57ca497845
3
+ size 320044
testset_data/audiocaps/7_Sr2zv1sQc.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d1b94abf9d28803a5744b60e4de72bbe462aa817261ba8c3ee7a503d06a0db
3
+ size 320044
testset_data/audiocaps/8Bu8CkR1xZY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f10d6da1e05c7978afa1b028fc755fe1d23d3796263d3878eb3ba83aaa085d
3
+ size 320044
testset_data/audiocaps/A3vsvL-Yx0Q.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00c60d9e5e2b9071e8f2f898adaf78f7d392acf7274bf9b6738fffead31bf27
3
+ size 320044
testset_data/audiocaps/AHrUfa2H_5s.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da7c879adac1e1624eb3483073f493fd0c837eadd497a45561850c5dad71eb5
3
+ size 320044
testset_data/audiocaps/B90BOtSOD2Q.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ef7017a184ee6c893bae9b484361e2c8d4acfabadfccca83ff7398fb5a8a22
3
+ size 320044
testset_data/audiocaps/Bl-lCgr5hGY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ad3a9721ab468d3abecd3bfb33a04806dd7e83bf3ce60ad8eb1c5b2190722e
3
+ size 320044
testset_data/audiocaps/ByOqw8M2U-Q.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9dbbacf98fac39663c9ed5fb0b58302884c0ecb1375cdbc4cb18e6b8fd8e98c
3
+ size 320044
testset_data/audiocaps/Cy3HWnwMLyI.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccde8b6db494aafb5eae8bd2a69a6f9b381073b9ff242a89a9169ea249692a6c
3
+ size 320044
testset_data/audiocaps/D3f5VIJYR7M.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bf1d4c2bd7315270f311e409eb8e32e37bf591235d6feda184ad9cba0b5601
3
+ size 320044
testset_data/audiocaps/DW3z-ByrfWY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149f1afc2bb775a7c99ccb09c8e6e603b2a3ed5dd8c6c93028a9720bb1b4e99f
3
+ size 320044
testset_data/audiocaps/EOaQnfDjVyo.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b92a82880c5c2ec12da32228887e0a721bd460cd3576ba21ff67bb127d4a8b
3
+ size 320044
testset_data/audiocaps/Fa1c4qfBqzE.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a9d87e3781a8d74a97e6d9a28127aec0072c7ce34b9d4739f0e1a41cc28ccb
3
+ size 320044
testset_data/audiocaps/HpkPTa1fQDE.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ae0d62cb8060e0a9101139c96df1867128f79182b35c7edb19b3e57295cdfe
3
+ size 320044
testset_data/audiocaps/KpwdlYIdtfs.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0280771fbe19535d4684992f9d3df3bdbfceebb670a7ebcb2e4834e4920b4843
3
+ size 320044
testset_data/audiocaps/LAHWV6fZwUk.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ba52fb3c6fe92f0fd689cae48f5d189a00306ceda745004b2d79db7d70a949
3
+ size 320044
testset_data/audiocaps/NSyqj1DXZKg.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ac4e9613525a966dd9da905529621422489ee17a4d789b1e05efefd8e3e01e
3
+ size 320044
testset_data/audiocaps/NzVg-cFQJbE.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfbd09f9483f395ea7b1a87dc7c3e18242ddf5f262a267ad54cc4241dfbd3db
3
+ size 320044
testset_data/audiocaps/TkclVqlyKx4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da3ca97bf3fa79478daccf8eb924062eca8af88dc99845c0827d9cbe78fa7fc
3
+ size 320044
testset_data/audiocaps/VkXLtUx-RmI.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640f780ea1c043641462342fef56e9db077b0aaed1a636e9dd61b71c0c83d53f
3
+ size 320044
testset_data/audiocaps/XXBVsNt2Qr8.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5100bfe43151f773135294c6fa17a77cc92a1f901cb84c1c5be75358e7618645
3
+ size 320044
testset_data/audiocaps/YwUa3OS92ZQ.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:031d0952cec730868e7ad3c290005ed0fe55470cdb3e029545768b4605fb88c7
3
+ size 320044
testset_data/audiocaps/_gWEpDgPAho.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bec87ac5b40c2d9ebf26118b12d4f90a4f48e3766608ba4ed27f05c63e31c7b
3
+ size 320044
testset_data/audiocaps/_u2cNlW5DxQ.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bf139665d39580e62359d131938a1f260219a3cc0e1a279f0c107e588fb145
3
+ size 320044
testset_data/audiocaps/bZuXMxR2S4U.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee3ac0c043fd0e9245b202a92cab10d3b7d3ddb585e9f1e7640e34c93d500d3
3
+ size 320044
testset_data/audiocaps/bl-eQ8XD5CY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f569b41c49db3364ab5284f6d20c92046afb386501df08fc865e548e58a5cb6e
3
+ size 320044
testset_data/audiocaps/dPCj4WhTZ3c.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f83cbd8264ded5bb2d573d73d318e6fb1a36988b288e96a4ef82312ea8b5b3e9
3
+ size 320044
testset_data/audiocaps/dTVkk4GSmNA.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79ad7f36214e686c35ae3d234e865862190ed6e76c8d0eec996ff5efe64939f
3
+ size 320044
testset_data/audiocaps/fJ6ZeWYfLjA.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3954307043dd101f3a270cccdfd59a960a4a7c3e5df478881ca0375049a1d87d
3
+ size 320044
testset_data/audiocaps/gW33LYEvoaw.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12539607e235125526aef365e551a45b8b5212f27f9988b6f9617a20c891039
3
+ size 320044
testset_data/audiocaps/hM88FG1_D5Q.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0babba6248fc32bb99d85fd90e0f3beb3b84fa8d86f45e7f5cf1e28b5a35f3c
3
+ size 320044
testset_data/audiocaps/jqiD3VeM_hY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8473f523e5cf35621318aa382c47ad5863df18d9409e81c859440e6a928793e2
3
+ size 320044
testset_data/audiocaps/mFcHGbnNtSQ.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644b53e6bdf01ef750911911acca3b492f04a966418c93072695864a06aefdb1
3
+ size 320044
testset_data/audiocaps/oxZjkE984Uo.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c619b9b340a904e7f3026be08a0741a74c075282aaa7cedf29f6a448a890ae
3
+ size 320044
testset_data/audiocaps/s72505MIhz8.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eea74f52099773a9db2444cae186a025d5950c729b8d4cc604ba18fd0f882de
3
+ size 320044
testset_data/audiocaps/xBDcJKb-9vk.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d5718a1c5f233d0639a098e008f373724710d856723c3a2b3e5ceeb38628ca
3
+ size 320044
testset_data/audiocaps/xmaVWayBvaY.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873f54931176cc29d03ca870612ebb6ba1cb55549e3c07cfc8cd30a45d9fe1d8
3
+ size 320044
testset_data/audiocaps/y6iMm7Pltq0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3092ec17275e2a16280d09d6cc5e27c8c0571f366abdb491ff7844941cc208
3
+ size 320044