diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..349c955624180aeeb00ebb5dae7d72dce47180c2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,50 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/8Bu8CkR1xZY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/AHrUfa2H_5s.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/XXBVsNt2Qr8.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/3IYd8cCmUkQ.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/hM88FG1_D5Q.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/bl-eQ8XD5CY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/B90BOtSOD2Q.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/6uIOGE36tWo.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/HpkPTa1fQDE.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/s72505MIhz8.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/2gvyOxKuQPY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/bZuXMxR2S4U.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/Cy3HWnwMLyI.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/_gWEpDgPAho.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/_u2cNlW5DxQ.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/KpwdlYIdtfs.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/EOaQnfDjVyo.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/dTVkk4GSmNA.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/ByOqw8M2U-Q.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/xmaVWayBvaY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/LAHWV6fZwUk.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/DW3z-ByrfWY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/gW33LYEvoaw.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/7_Sr2zv1sQc.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/fJ6ZeWYfLjA.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/TkclVqlyKx4.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/Fa1c4qfBqzE.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/dPCj4WhTZ3c.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/D3f5VIJYR7M.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/3Xoz87_SUdw.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/oxZjkE984Uo.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/0Wdh45yt7tY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/A3vsvL-Yx0Q.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/2RMOegT2Jn8.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/jqiD3VeM_hY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/NSyqj1DXZKg.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/NzVg-cFQJbE.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/YwUa3OS92ZQ.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/Bl-lCgr5hGY.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/yrme-KRBvzk.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/mFcHGbnNtSQ.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/y6iMm7Pltq0.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/xBDcJKb-9vk.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/4mtfOkzOvBI.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/05OJDYeHLMc.wav filter=lfs diff=lfs merge=lfs -text +testset_data/audiocaps/VkXLtUx-RmI.wav filter=lfs diff=lfs merge=lfs -text diff --git a/latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml b/latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ee2ed2715759557244cdbfb3370a8094a689c95 --- /dev/null +++ b/latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml @@ -0,0 +1,149 @@ +metadata_root: "./data/metadata/dataset_root.json" +log_directory: "./log/latent_diffusion" +project: "audioldm" +precision: "high" + +variables: + sampling_rate: &sampling_rate 16000 + mel_bins: &mel_bins 64 + latent_embed_dim: &latent_embed_dim 8 + latent_t_size: &latent_t_size 256 # TODO might need to change + latent_f_size: &latent_f_size 16 + in_channels: &unet_in_channels 8 + optimize_ddpm_parameter: &optimize_ddpm_parameter true + optimize_gpt: &optimize_gpt true + warmup_steps: &warmup_steps 2000 + +data: + train: ["audiocaps"] + val: "audiocaps" + test: "audiocaps" + class_label_indices: "audioset_eval_subset" + dataloader_add_ons: [] + +step: + validation_every_n_epochs: 5 + save_checkpoint_every_n_steps: 40000 + max_steps: 800000 + save_top_k: 2 + +preprocessing: + audio: + sampling_rate: *sampling_rate + max_wav_value: 32768.0 + duration: 10.24 + stft: + filter_length: 1024 + hop_length: 160 + win_length: 1024 + mel: + n_mel_channels: *mel_bins + mel_fmin: 0 + mel_fmax: 8000 + num_workers: 2 + +augmentation: + mixup: 0.0 + +addition_params: + gradient_accumulation_steps: 1 + +model: + target: audioldm_train.modules.latent_diffusion.ddpm.LatentDiffusion + params: + # Autoencoder + first_stage_config: + base_learning_rate: 8.0e-06 + target: audioldm_train.modules.latent_encoder.autoencoder.AutoencoderKL + params: + reload_from_ckpt: "ckpts/vae_mel_16k_64bins.ckpt" + sampling_rate: *sampling_rate + batchsize: 3 + monitor: val/rec_loss + image_key: fbank + subband: 1 + embed_dim: *latent_embed_dim + time_shuffle: 1 + lossconfig: + target: audioldm_train.losses.LPIPSWithDiscriminator + params: + disc_start: 50001 + kl_weight: 1000.0 + disc_weight: 0.5 + disc_in_channels: 1 + ddconfig: + double_z: true + mel_bins: *mel_bins # The frequency bins of mel spectrogram + z_channels: 8 + resolution: 256 + downsample_time: false + in_channels: 1 + out_ch: 1 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + + # Other parameters + base_learning_rate: 1.0e-4 + warmup_steps: *warmup_steps + optimize_ddpm_parameter: *optimize_ddpm_parameter + sampling_rate: *sampling_rate + batchsize: 1 + linear_start: 0.0015 + linear_end: 0.0195 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + unconditional_prob_cfg: 0.1 + parameterization: eps # [eps, x0, v] + first_stage_key: fbank + latent_t_size: *latent_t_size # TODO might need to change + latent_f_size: *latent_f_size + channels: *latent_embed_dim # TODO might need to change + monitor: val/loss_simple_ema + scale_by_std: true + unet_config: + target: audioldm_train.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 64 + extra_film_condition_dim: 512 # If you use film as extra condition, set this parameter. For example if you have two conditioning vectors each have dimension 512, then this number would be 1024 + # context_dim: + # - 768 + in_channels: *unet_in_channels # The input channel of the UNet model + out_channels: *latent_embed_dim # TODO might need to change + model_channels: 128 # TODO might need to change + attention_resolutions: + - 8 + - 4 + - 2 + num_res_blocks: 2 + channel_mult: + - 1 + - 2 + - 3 + - 5 + num_head_channels: 32 + use_spatial_transformer: true + transformer_depth: 1 + extra_sa_layer: false + + cond_stage_config: + film_clap_cond1: + cond_stage_key: text + conditioning_key: film + target: audioldm_train.conditional_models.CLAPAudioEmbeddingClassifierFreev2 + params: + pretrained_path: ckpts/clap_htsat_tiny.pt + sampling_rate: 16000 + embed_mode: text # or text + amodel: HTSAT-tiny + + evaluation_params: + unconditional_guidance_scale: 3.5 + ddim_sampling_steps: 200 + n_candidates_per_samples: 3 diff --git a/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt b/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..c30d42c77631fd12e5460c3d4e9546dd918ff310 --- /dev/null +++ b/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81874520599c1cdc48f87af9a3d41ce228de409b9a31eae73e2c218819ee7feb +size 4910952259 diff --git a/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt b/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e33e0d15fe112ad89933b64c5525d757f9492612 --- /dev/null +++ b/latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8938853f65a559040b00d8164b58aa541396d807b5a6e3ba8c8ab9ef5285a9 +size 4910952259 diff --git a/latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav b/latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav new file mode 100644 index 0000000000000000000000000000000000000000..0a09c9dbc9d738fed825e645360baff636f60654 --- /dev/null +++ b/latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325f313d578e4057eab5474cae4744464936581e7b71446bfbb003f96fc343eb +size 327788 diff --git a/testset_data/audiocaps/05OJDYeHLMc.wav b/testset_data/audiocaps/05OJDYeHLMc.wav new file mode 100644 index 0000000000000000000000000000000000000000..0ce2ac83b5d58b109b57182f140caf080069b8ae --- /dev/null +++ b/testset_data/audiocaps/05OJDYeHLMc.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdb94d0d33f4b21375b3bef8d2ffd4ea063e4ad1bf08751cceacdb3ae3e7c400 +size 320044 diff --git a/testset_data/audiocaps/0Wdh45yt7tY.wav b/testset_data/audiocaps/0Wdh45yt7tY.wav new file mode 100644 index 0000000000000000000000000000000000000000..6b62b1aaddfa24096fdc96f29928281e23eb8a09 --- /dev/null +++ b/testset_data/audiocaps/0Wdh45yt7tY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f01912fdf5fae71dde63f09749570faf29c16ff8b289e3e23f88a681890cc45 +size 320044 diff --git a/testset_data/audiocaps/2RMOegT2Jn8.wav b/testset_data/audiocaps/2RMOegT2Jn8.wav new file mode 100644 index 0000000000000000000000000000000000000000..b1f5cbbe6aea276bbb63116f9abdfd5d13ef0a36 --- /dev/null +++ b/testset_data/audiocaps/2RMOegT2Jn8.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020e2365d24b30c3ef632e9195bbe9dd56b643e6691bd3a6b44572df3bee92b2 +size 320044 diff --git a/testset_data/audiocaps/2gvyOxKuQPY.wav b/testset_data/audiocaps/2gvyOxKuQPY.wav new file mode 100644 index 0000000000000000000000000000000000000000..32c271a7f0de50d19e2c33d11b54deb554f15949 --- /dev/null +++ b/testset_data/audiocaps/2gvyOxKuQPY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef302a002971776b76428d76769766ae847f291f1ace33099b9e225f5ada710b +size 320044 diff --git a/testset_data/audiocaps/3IYd8cCmUkQ.wav b/testset_data/audiocaps/3IYd8cCmUkQ.wav new file mode 100644 index 0000000000000000000000000000000000000000..81bc0b159de350989ca3911f7b6aeb2f805d59ea --- /dev/null +++ b/testset_data/audiocaps/3IYd8cCmUkQ.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fb31647ed1e05f2eaf7bdede7f9097ee0cabad07fec6168b6d7154e102a5a6 +size 320044 diff --git a/testset_data/audiocaps/3Xoz87_SUdw.wav b/testset_data/audiocaps/3Xoz87_SUdw.wav new file mode 100644 index 0000000000000000000000000000000000000000..849eecbfcd924454ef922ce77645b18fd36eb60d --- /dev/null +++ b/testset_data/audiocaps/3Xoz87_SUdw.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee133da9f25dcb3a20a0299f732a39208be2c6f2546a5b14b5973db9f858b2f6 +size 320044 diff --git a/testset_data/audiocaps/4mtfOkzOvBI.wav b/testset_data/audiocaps/4mtfOkzOvBI.wav new file mode 100644 index 0000000000000000000000000000000000000000..2aa83bb2bcca76bea3d4d92c1d1b79d965b8c324 --- /dev/null +++ b/testset_data/audiocaps/4mtfOkzOvBI.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b245df371a41e0949fc0ef1181efaf1438e610d76348aa174303f19eb8a53065 +size 320044 diff --git a/testset_data/audiocaps/6uIOGE36tWo.wav b/testset_data/audiocaps/6uIOGE36tWo.wav new file mode 100644 index 0000000000000000000000000000000000000000..c3aafe3dc7e8ffb47d8d82064a415436faadb417 --- /dev/null +++ b/testset_data/audiocaps/6uIOGE36tWo.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be77f7f2f24d4fa6b1b32e9b6bf78d3d391ec1ad9a0a3cc65e2f6c57ca497845 +size 320044 diff --git a/testset_data/audiocaps/7_Sr2zv1sQc.wav b/testset_data/audiocaps/7_Sr2zv1sQc.wav new file mode 100644 index 0000000000000000000000000000000000000000..d5bf13f3f8933fc3f76e88988d9c2ada57ecab27 --- /dev/null +++ b/testset_data/audiocaps/7_Sr2zv1sQc.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d1b94abf9d28803a5744b60e4de72bbe462aa817261ba8c3ee7a503d06a0db +size 320044 diff --git a/testset_data/audiocaps/8Bu8CkR1xZY.wav b/testset_data/audiocaps/8Bu8CkR1xZY.wav new file mode 100644 index 0000000000000000000000000000000000000000..03dc5d908e882a38bedbf7dfcbef0fa10c973ee6 --- /dev/null +++ b/testset_data/audiocaps/8Bu8CkR1xZY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f10d6da1e05c7978afa1b028fc755fe1d23d3796263d3878eb3ba83aaa085d +size 320044 diff --git a/testset_data/audiocaps/A3vsvL-Yx0Q.wav b/testset_data/audiocaps/A3vsvL-Yx0Q.wav new file mode 100644 index 0000000000000000000000000000000000000000..2a3ced44bb20375dc3cc88335eb8ccce782d1269 --- /dev/null +++ b/testset_data/audiocaps/A3vsvL-Yx0Q.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00c60d9e5e2b9071e8f2f898adaf78f7d392acf7274bf9b6738fffead31bf27 +size 320044 diff --git a/testset_data/audiocaps/AHrUfa2H_5s.wav b/testset_data/audiocaps/AHrUfa2H_5s.wav new file mode 100644 index 0000000000000000000000000000000000000000..8e3a012626553d2118d1329748c324bb7a36a010 --- /dev/null +++ b/testset_data/audiocaps/AHrUfa2H_5s.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da7c879adac1e1624eb3483073f493fd0c837eadd497a45561850c5dad71eb5 +size 320044 diff --git a/testset_data/audiocaps/B90BOtSOD2Q.wav b/testset_data/audiocaps/B90BOtSOD2Q.wav new file mode 100644 index 0000000000000000000000000000000000000000..ae6df4ae81e3783daf434ba2063a3468e449bdfa --- /dev/null +++ b/testset_data/audiocaps/B90BOtSOD2Q.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ef7017a184ee6c893bae9b484361e2c8d4acfabadfccca83ff7398fb5a8a22 +size 320044 diff --git a/testset_data/audiocaps/Bl-lCgr5hGY.wav b/testset_data/audiocaps/Bl-lCgr5hGY.wav new file mode 100644 index 0000000000000000000000000000000000000000..5e2c6e59173aede1b6e87d1d0e96d98f666e0b42 --- /dev/null +++ b/testset_data/audiocaps/Bl-lCgr5hGY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ad3a9721ab468d3abecd3bfb33a04806dd7e83bf3ce60ad8eb1c5b2190722e +size 320044 diff --git a/testset_data/audiocaps/ByOqw8M2U-Q.wav b/testset_data/audiocaps/ByOqw8M2U-Q.wav new file mode 100644 index 0000000000000000000000000000000000000000..7befca7f2e684e295cc6efed2db1a89b3bb3ce55 --- /dev/null +++ b/testset_data/audiocaps/ByOqw8M2U-Q.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9dbbacf98fac39663c9ed5fb0b58302884c0ecb1375cdbc4cb18e6b8fd8e98c +size 320044 diff --git a/testset_data/audiocaps/Cy3HWnwMLyI.wav b/testset_data/audiocaps/Cy3HWnwMLyI.wav new file mode 100644 index 0000000000000000000000000000000000000000..8dc810d2e4479e56c4ef34435a59387d9e5a00ff --- /dev/null +++ b/testset_data/audiocaps/Cy3HWnwMLyI.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccde8b6db494aafb5eae8bd2a69a6f9b381073b9ff242a89a9169ea249692a6c +size 320044 diff --git a/testset_data/audiocaps/D3f5VIJYR7M.wav b/testset_data/audiocaps/D3f5VIJYR7M.wav new file mode 100644 index 0000000000000000000000000000000000000000..7b78c6f3a46bf755615eeee4bbeef28213ea099d --- /dev/null +++ b/testset_data/audiocaps/D3f5VIJYR7M.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0bf1d4c2bd7315270f311e409eb8e32e37bf591235d6feda184ad9cba0b5601 +size 320044 diff --git a/testset_data/audiocaps/DW3z-ByrfWY.wav b/testset_data/audiocaps/DW3z-ByrfWY.wav new file mode 100644 index 0000000000000000000000000000000000000000..e984538ff72a5369a65392078c3717c827f57fb0 --- /dev/null +++ b/testset_data/audiocaps/DW3z-ByrfWY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149f1afc2bb775a7c99ccb09c8e6e603b2a3ed5dd8c6c93028a9720bb1b4e99f +size 320044 diff --git a/testset_data/audiocaps/EOaQnfDjVyo.wav b/testset_data/audiocaps/EOaQnfDjVyo.wav new file mode 100644 index 0000000000000000000000000000000000000000..8d80abe104fd337351d4a6c28fb3d4901dfc3b79 --- /dev/null +++ b/testset_data/audiocaps/EOaQnfDjVyo.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b92a82880c5c2ec12da32228887e0a721bd460cd3576ba21ff67bb127d4a8b +size 320044 diff --git a/testset_data/audiocaps/Fa1c4qfBqzE.wav b/testset_data/audiocaps/Fa1c4qfBqzE.wav new file mode 100644 index 0000000000000000000000000000000000000000..87eab8bf68e2dd6a0bcbb22af54780e589d69eed --- /dev/null +++ b/testset_data/audiocaps/Fa1c4qfBqzE.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a9d87e3781a8d74a97e6d9a28127aec0072c7ce34b9d4739f0e1a41cc28ccb +size 320044 diff --git a/testset_data/audiocaps/HpkPTa1fQDE.wav b/testset_data/audiocaps/HpkPTa1fQDE.wav new file mode 100644 index 0000000000000000000000000000000000000000..8e3b6b92a6ee154f4518548aaad37707515c4c02 --- /dev/null +++ b/testset_data/audiocaps/HpkPTa1fQDE.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ae0d62cb8060e0a9101139c96df1867128f79182b35c7edb19b3e57295cdfe +size 320044 diff --git a/testset_data/audiocaps/KpwdlYIdtfs.wav b/testset_data/audiocaps/KpwdlYIdtfs.wav new file mode 100644 index 0000000000000000000000000000000000000000..9cb92bfdf27e45ad78d2c53603419b6c62f1cde9 --- /dev/null +++ b/testset_data/audiocaps/KpwdlYIdtfs.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0280771fbe19535d4684992f9d3df3bdbfceebb670a7ebcb2e4834e4920b4843 +size 320044 diff --git a/testset_data/audiocaps/LAHWV6fZwUk.wav b/testset_data/audiocaps/LAHWV6fZwUk.wav new file mode 100644 index 0000000000000000000000000000000000000000..907e0a1aa206fe9d6201f0406cc5a2e10a93ebd5 --- /dev/null +++ b/testset_data/audiocaps/LAHWV6fZwUk.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ba52fb3c6fe92f0fd689cae48f5d189a00306ceda745004b2d79db7d70a949 +size 320044 diff --git a/testset_data/audiocaps/NSyqj1DXZKg.wav b/testset_data/audiocaps/NSyqj1DXZKg.wav new file mode 100644 index 0000000000000000000000000000000000000000..fa486eb4458cc47f70650287b938e927ec2262f5 --- /dev/null +++ b/testset_data/audiocaps/NSyqj1DXZKg.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ac4e9613525a966dd9da905529621422489ee17a4d789b1e05efefd8e3e01e +size 320044 diff --git a/testset_data/audiocaps/NzVg-cFQJbE.wav b/testset_data/audiocaps/NzVg-cFQJbE.wav new file mode 100644 index 0000000000000000000000000000000000000000..cb3b8eddc3c611b217487ff45e5581945a863f80 --- /dev/null +++ b/testset_data/audiocaps/NzVg-cFQJbE.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfbd09f9483f395ea7b1a87dc7c3e18242ddf5f262a267ad54cc4241dfbd3db +size 320044 diff --git a/testset_data/audiocaps/TkclVqlyKx4.wav b/testset_data/audiocaps/TkclVqlyKx4.wav new file mode 100644 index 0000000000000000000000000000000000000000..10853facbc0ecd956ca053682a56d8c340803582 --- /dev/null +++ b/testset_data/audiocaps/TkclVqlyKx4.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da3ca97bf3fa79478daccf8eb924062eca8af88dc99845c0827d9cbe78fa7fc +size 320044 diff --git a/testset_data/audiocaps/VkXLtUx-RmI.wav b/testset_data/audiocaps/VkXLtUx-RmI.wav new file mode 100644 index 0000000000000000000000000000000000000000..336396180d9b8d7ed9936e953508112f2805d7df --- /dev/null +++ b/testset_data/audiocaps/VkXLtUx-RmI.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640f780ea1c043641462342fef56e9db077b0aaed1a636e9dd61b71c0c83d53f +size 320044 diff --git a/testset_data/audiocaps/XXBVsNt2Qr8.wav b/testset_data/audiocaps/XXBVsNt2Qr8.wav new file mode 100644 index 0000000000000000000000000000000000000000..66debb4341afc25b72501a8adeb9b4b97b63e047 --- /dev/null +++ b/testset_data/audiocaps/XXBVsNt2Qr8.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5100bfe43151f773135294c6fa17a77cc92a1f901cb84c1c5be75358e7618645 +size 320044 diff --git a/testset_data/audiocaps/YwUa3OS92ZQ.wav b/testset_data/audiocaps/YwUa3OS92ZQ.wav new file mode 100644 index 0000000000000000000000000000000000000000..7d8c68a054750aa068a5638979dc9f7793490e5a --- /dev/null +++ b/testset_data/audiocaps/YwUa3OS92ZQ.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031d0952cec730868e7ad3c290005ed0fe55470cdb3e029545768b4605fb88c7 +size 320044 diff --git a/testset_data/audiocaps/_gWEpDgPAho.wav b/testset_data/audiocaps/_gWEpDgPAho.wav new file mode 100644 index 0000000000000000000000000000000000000000..e66bf3e013f86ceb2629eddea215070c515713b5 --- /dev/null +++ b/testset_data/audiocaps/_gWEpDgPAho.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bec87ac5b40c2d9ebf26118b12d4f90a4f48e3766608ba4ed27f05c63e31c7b +size 320044 diff --git a/testset_data/audiocaps/_u2cNlW5DxQ.wav b/testset_data/audiocaps/_u2cNlW5DxQ.wav new file mode 100644 index 0000000000000000000000000000000000000000..fab20d05c0500bd0d00538e0e8c3550836691b3c --- /dev/null +++ b/testset_data/audiocaps/_u2cNlW5DxQ.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2bf139665d39580e62359d131938a1f260219a3cc0e1a279f0c107e588fb145 +size 320044 diff --git a/testset_data/audiocaps/bZuXMxR2S4U.wav b/testset_data/audiocaps/bZuXMxR2S4U.wav new file mode 100644 index 0000000000000000000000000000000000000000..f2e54cfc6bdbc885aecf9503541cbdba11677098 --- /dev/null +++ b/testset_data/audiocaps/bZuXMxR2S4U.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee3ac0c043fd0e9245b202a92cab10d3b7d3ddb585e9f1e7640e34c93d500d3 +size 320044 diff --git a/testset_data/audiocaps/bl-eQ8XD5CY.wav b/testset_data/audiocaps/bl-eQ8XD5CY.wav new file mode 100644 index 0000000000000000000000000000000000000000..174d1d24f36c1336ae0dfc92dc5056fbb960f4b3 --- /dev/null +++ b/testset_data/audiocaps/bl-eQ8XD5CY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f569b41c49db3364ab5284f6d20c92046afb386501df08fc865e548e58a5cb6e +size 320044 diff --git a/testset_data/audiocaps/dPCj4WhTZ3c.wav b/testset_data/audiocaps/dPCj4WhTZ3c.wav new file mode 100644 index 0000000000000000000000000000000000000000..6ef746ccfa4640f90d99f9b11972ee58a77b9521 --- /dev/null +++ b/testset_data/audiocaps/dPCj4WhTZ3c.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83cbd8264ded5bb2d573d73d318e6fb1a36988b288e96a4ef82312ea8b5b3e9 +size 320044 diff --git a/testset_data/audiocaps/dTVkk4GSmNA.wav b/testset_data/audiocaps/dTVkk4GSmNA.wav new file mode 100644 index 0000000000000000000000000000000000000000..34437bfa4b6105fd3c636c56169852a83598c878 --- /dev/null +++ b/testset_data/audiocaps/dTVkk4GSmNA.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79ad7f36214e686c35ae3d234e865862190ed6e76c8d0eec996ff5efe64939f +size 320044 diff --git a/testset_data/audiocaps/fJ6ZeWYfLjA.wav b/testset_data/audiocaps/fJ6ZeWYfLjA.wav new file mode 100644 index 0000000000000000000000000000000000000000..f08d11053b106febbc4a81e2e39c4aec696d375b --- /dev/null +++ b/testset_data/audiocaps/fJ6ZeWYfLjA.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3954307043dd101f3a270cccdfd59a960a4a7c3e5df478881ca0375049a1d87d +size 320044 diff --git a/testset_data/audiocaps/gW33LYEvoaw.wav b/testset_data/audiocaps/gW33LYEvoaw.wav new file mode 100644 index 0000000000000000000000000000000000000000..251e5876cb6e5a89879c3e11f98dc4499cbb7754 --- /dev/null +++ b/testset_data/audiocaps/gW33LYEvoaw.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12539607e235125526aef365e551a45b8b5212f27f9988b6f9617a20c891039 +size 320044 diff --git a/testset_data/audiocaps/hM88FG1_D5Q.wav b/testset_data/audiocaps/hM88FG1_D5Q.wav new file mode 100644 index 0000000000000000000000000000000000000000..70713fa99d96732b49a1a68cacef6b9c6c4c178b --- /dev/null +++ b/testset_data/audiocaps/hM88FG1_D5Q.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0babba6248fc32bb99d85fd90e0f3beb3b84fa8d86f45e7f5cf1e28b5a35f3c +size 320044 diff --git a/testset_data/audiocaps/jqiD3VeM_hY.wav b/testset_data/audiocaps/jqiD3VeM_hY.wav new file mode 100644 index 0000000000000000000000000000000000000000..a71c7f358dafd6384a42f81eedf8dc742d23b3b3 --- /dev/null +++ b/testset_data/audiocaps/jqiD3VeM_hY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8473f523e5cf35621318aa382c47ad5863df18d9409e81c859440e6a928793e2 +size 320044 diff --git a/testset_data/audiocaps/mFcHGbnNtSQ.wav b/testset_data/audiocaps/mFcHGbnNtSQ.wav new file mode 100644 index 0000000000000000000000000000000000000000..899c7bfcc2c17e6d461ce6d3a182d00863d772f1 --- /dev/null +++ b/testset_data/audiocaps/mFcHGbnNtSQ.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644b53e6bdf01ef750911911acca3b492f04a966418c93072695864a06aefdb1 +size 320044 diff --git a/testset_data/audiocaps/oxZjkE984Uo.wav b/testset_data/audiocaps/oxZjkE984Uo.wav new file mode 100644 index 0000000000000000000000000000000000000000..42ae1132b256b0c4c9bc46f68997aa2c3f03e405 --- /dev/null +++ b/testset_data/audiocaps/oxZjkE984Uo.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c619b9b340a904e7f3026be08a0741a74c075282aaa7cedf29f6a448a890ae +size 320044 diff --git a/testset_data/audiocaps/s72505MIhz8.wav b/testset_data/audiocaps/s72505MIhz8.wav new file mode 100644 index 0000000000000000000000000000000000000000..d4c95d825c9764d872d8460f4c7e3b92bfbcccc8 --- /dev/null +++ b/testset_data/audiocaps/s72505MIhz8.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eea74f52099773a9db2444cae186a025d5950c729b8d4cc604ba18fd0f882de +size 320044 diff --git a/testset_data/audiocaps/xBDcJKb-9vk.wav b/testset_data/audiocaps/xBDcJKb-9vk.wav new file mode 100644 index 0000000000000000000000000000000000000000..27717de1cbadc8b1622ebeb33386ce045b4f03dd --- /dev/null +++ b/testset_data/audiocaps/xBDcJKb-9vk.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d5718a1c5f233d0639a098e008f373724710d856723c3a2b3e5ceeb38628ca +size 320044 diff --git a/testset_data/audiocaps/xmaVWayBvaY.wav b/testset_data/audiocaps/xmaVWayBvaY.wav new file mode 100644 index 0000000000000000000000000000000000000000..261f2eaa3163de723a34b7451f0eb927e00ce700 --- /dev/null +++ b/testset_data/audiocaps/xmaVWayBvaY.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873f54931176cc29d03ca870612ebb6ba1cb55549e3c07cfc8cd30a45d9fe1d8 +size 320044 diff --git a/testset_data/audiocaps/y6iMm7Pltq0.wav b/testset_data/audiocaps/y6iMm7Pltq0.wav new file mode 100644 index 0000000000000000000000000000000000000000..5615ea1d626fc7e2c0caef23accb231074d426b2 --- /dev/null +++ b/testset_data/audiocaps/y6iMm7Pltq0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3092ec17275e2a16280d09d6cc5e27c8c0571f366abdb491ff7844941cc208 +size 320044 diff --git a/testset_data/audiocaps/yrme-KRBvzk.wav b/testset_data/audiocaps/yrme-KRBvzk.wav new file mode 100644 index 0000000000000000000000000000000000000000..ad0f2a05187b8df50161c21eade8ab58b8a304a1 --- /dev/null +++ b/testset_data/audiocaps/yrme-KRBvzk.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994c75b2ad6cdd0b21f5d0e08177c0e61b7bd5837acf88491bb1a9ec5ab6a8d3 +size 320044