Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +47 -0
- latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml +149 -0
- latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt +3 -0
- latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt +3 -0
- latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav +3 -0
- testset_data/audiocaps/05OJDYeHLMc.wav +3 -0
- testset_data/audiocaps/0Wdh45yt7tY.wav +3 -0
- testset_data/audiocaps/2RMOegT2Jn8.wav +3 -0
- testset_data/audiocaps/2gvyOxKuQPY.wav +3 -0
- testset_data/audiocaps/3IYd8cCmUkQ.wav +3 -0
- testset_data/audiocaps/3Xoz87_SUdw.wav +3 -0
- testset_data/audiocaps/4mtfOkzOvBI.wav +3 -0
- testset_data/audiocaps/6uIOGE36tWo.wav +3 -0
- testset_data/audiocaps/7_Sr2zv1sQc.wav +3 -0
- testset_data/audiocaps/8Bu8CkR1xZY.wav +3 -0
- testset_data/audiocaps/A3vsvL-Yx0Q.wav +3 -0
- testset_data/audiocaps/AHrUfa2H_5s.wav +3 -0
- testset_data/audiocaps/B90BOtSOD2Q.wav +3 -0
- testset_data/audiocaps/Bl-lCgr5hGY.wav +3 -0
- testset_data/audiocaps/ByOqw8M2U-Q.wav +3 -0
- testset_data/audiocaps/Cy3HWnwMLyI.wav +3 -0
- testset_data/audiocaps/D3f5VIJYR7M.wav +3 -0
- testset_data/audiocaps/DW3z-ByrfWY.wav +3 -0
- testset_data/audiocaps/EOaQnfDjVyo.wav +3 -0
- testset_data/audiocaps/Fa1c4qfBqzE.wav +3 -0
- testset_data/audiocaps/HpkPTa1fQDE.wav +3 -0
- testset_data/audiocaps/KpwdlYIdtfs.wav +3 -0
- testset_data/audiocaps/LAHWV6fZwUk.wav +3 -0
- testset_data/audiocaps/NSyqj1DXZKg.wav +3 -0
- testset_data/audiocaps/NzVg-cFQJbE.wav +3 -0
- testset_data/audiocaps/TkclVqlyKx4.wav +3 -0
- testset_data/audiocaps/VkXLtUx-RmI.wav +3 -0
- testset_data/audiocaps/XXBVsNt2Qr8.wav +3 -0
- testset_data/audiocaps/YwUa3OS92ZQ.wav +3 -0
- testset_data/audiocaps/_gWEpDgPAho.wav +3 -0
- testset_data/audiocaps/_u2cNlW5DxQ.wav +3 -0
- testset_data/audiocaps/bZuXMxR2S4U.wav +3 -0
- testset_data/audiocaps/bl-eQ8XD5CY.wav +3 -0
- testset_data/audiocaps/dPCj4WhTZ3c.wav +3 -0
- testset_data/audiocaps/dTVkk4GSmNA.wav +3 -0
- testset_data/audiocaps/fJ6ZeWYfLjA.wav +3 -0
- testset_data/audiocaps/gW33LYEvoaw.wav +3 -0
- testset_data/audiocaps/hM88FG1_D5Q.wav +3 -0
- testset_data/audiocaps/jqiD3VeM_hY.wav +3 -0
- testset_data/audiocaps/mFcHGbnNtSQ.wav +3 -0
- testset_data/audiocaps/oxZjkE984Uo.wav +3 -0
- testset_data/audiocaps/s72505MIhz8.wav +3 -0
- testset_data/audiocaps/xBDcJKb-9vk.wav +3 -0
- testset_data/audiocaps/xmaVWayBvaY.wav +3 -0
- testset_data/audiocaps/y6iMm7Pltq0.wav +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,50 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
testset_data/audiocaps/8Bu8CkR1xZY.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
testset_data/audiocaps/AHrUfa2H_5s.wav filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
testset_data/audiocaps/XXBVsNt2Qr8.wav filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
testset_data/audiocaps/3IYd8cCmUkQ.wav filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
testset_data/audiocaps/hM88FG1_D5Q.wav filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
testset_data/audiocaps/bl-eQ8XD5CY.wav filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
testset_data/audiocaps/B90BOtSOD2Q.wav filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
testset_data/audiocaps/6uIOGE36tWo.wav filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
testset_data/audiocaps/HpkPTa1fQDE.wav filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
testset_data/audiocaps/s72505MIhz8.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
testset_data/audiocaps/2gvyOxKuQPY.wav filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
testset_data/audiocaps/bZuXMxR2S4U.wav filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
testset_data/audiocaps/Cy3HWnwMLyI.wav filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
testset_data/audiocaps/_gWEpDgPAho.wav filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
testset_data/audiocaps/_u2cNlW5DxQ.wav filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
testset_data/audiocaps/KpwdlYIdtfs.wav filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
testset_data/audiocaps/EOaQnfDjVyo.wav filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
testset_data/audiocaps/dTVkk4GSmNA.wav filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
testset_data/audiocaps/ByOqw8M2U-Q.wav filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
testset_data/audiocaps/xmaVWayBvaY.wav filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
testset_data/audiocaps/LAHWV6fZwUk.wav filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
testset_data/audiocaps/DW3z-ByrfWY.wav filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
testset_data/audiocaps/gW33LYEvoaw.wav filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
testset_data/audiocaps/7_Sr2zv1sQc.wav filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
testset_data/audiocaps/fJ6ZeWYfLjA.wav filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
testset_data/audiocaps/TkclVqlyKx4.wav filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
testset_data/audiocaps/Fa1c4qfBqzE.wav filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
testset_data/audiocaps/dPCj4WhTZ3c.wav filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
testset_data/audiocaps/D3f5VIJYR7M.wav filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
testset_data/audiocaps/3Xoz87_SUdw.wav filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
testset_data/audiocaps/oxZjkE984Uo.wav filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
testset_data/audiocaps/0Wdh45yt7tY.wav filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
testset_data/audiocaps/A3vsvL-Yx0Q.wav filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
testset_data/audiocaps/2RMOegT2Jn8.wav filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
testset_data/audiocaps/jqiD3VeM_hY.wav filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
testset_data/audiocaps/NSyqj1DXZKg.wav filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
testset_data/audiocaps/NzVg-cFQJbE.wav filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
testset_data/audiocaps/YwUa3OS92ZQ.wav filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
testset_data/audiocaps/Bl-lCgr5hGY.wav filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
testset_data/audiocaps/yrme-KRBvzk.wav filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
testset_data/audiocaps/mFcHGbnNtSQ.wav filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
testset_data/audiocaps/y6iMm7Pltq0.wav filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
testset_data/audiocaps/xBDcJKb-9vk.wav filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
testset_data/audiocaps/4mtfOkzOvBI.wav filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
testset_data/audiocaps/05OJDYeHLMc.wav filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
testset_data/audiocaps/VkXLtUx-RmI.wav filter=lfs diff=lfs merge=lfs -text
|
latent_diffusion/my_configs/audioldm_original/audioldm_original.yaml
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
metadata_root: "./data/metadata/dataset_root.json"
|
| 2 |
+
log_directory: "./log/latent_diffusion"
|
| 3 |
+
project: "audioldm"
|
| 4 |
+
precision: "high"
|
| 5 |
+
|
| 6 |
+
variables:
|
| 7 |
+
sampling_rate: &sampling_rate 16000
|
| 8 |
+
mel_bins: &mel_bins 64
|
| 9 |
+
latent_embed_dim: &latent_embed_dim 8
|
| 10 |
+
latent_t_size: &latent_t_size 256 # TODO might need to change
|
| 11 |
+
latent_f_size: &latent_f_size 16
|
| 12 |
+
in_channels: &unet_in_channels 8
|
| 13 |
+
optimize_ddpm_parameter: &optimize_ddpm_parameter true
|
| 14 |
+
optimize_gpt: &optimize_gpt true
|
| 15 |
+
warmup_steps: &warmup_steps 2000
|
| 16 |
+
|
| 17 |
+
data:
|
| 18 |
+
train: ["audiocaps"]
|
| 19 |
+
val: "audiocaps"
|
| 20 |
+
test: "audiocaps"
|
| 21 |
+
class_label_indices: "audioset_eval_subset"
|
| 22 |
+
dataloader_add_ons: []
|
| 23 |
+
|
| 24 |
+
step:
|
| 25 |
+
validation_every_n_epochs: 5
|
| 26 |
+
save_checkpoint_every_n_steps: 40000
|
| 27 |
+
max_steps: 800000
|
| 28 |
+
save_top_k: 2
|
| 29 |
+
|
| 30 |
+
preprocessing:
|
| 31 |
+
audio:
|
| 32 |
+
sampling_rate: *sampling_rate
|
| 33 |
+
max_wav_value: 32768.0
|
| 34 |
+
duration: 10.24
|
| 35 |
+
stft:
|
| 36 |
+
filter_length: 1024
|
| 37 |
+
hop_length: 160
|
| 38 |
+
win_length: 1024
|
| 39 |
+
mel:
|
| 40 |
+
n_mel_channels: *mel_bins
|
| 41 |
+
mel_fmin: 0
|
| 42 |
+
mel_fmax: 8000
|
| 43 |
+
num_workers: 2
|
| 44 |
+
|
| 45 |
+
augmentation:
|
| 46 |
+
mixup: 0.0
|
| 47 |
+
|
| 48 |
+
addition_params:
|
| 49 |
+
gradient_accumulation_steps: 1
|
| 50 |
+
|
| 51 |
+
model:
|
| 52 |
+
target: audioldm_train.modules.latent_diffusion.ddpm.LatentDiffusion
|
| 53 |
+
params:
|
| 54 |
+
# Autoencoder
|
| 55 |
+
first_stage_config:
|
| 56 |
+
base_learning_rate: 8.0e-06
|
| 57 |
+
target: audioldm_train.modules.latent_encoder.autoencoder.AutoencoderKL
|
| 58 |
+
params:
|
| 59 |
+
reload_from_ckpt: "ckpts/vae_mel_16k_64bins.ckpt"
|
| 60 |
+
sampling_rate: *sampling_rate
|
| 61 |
+
batchsize: 3
|
| 62 |
+
monitor: val/rec_loss
|
| 63 |
+
image_key: fbank
|
| 64 |
+
subband: 1
|
| 65 |
+
embed_dim: *latent_embed_dim
|
| 66 |
+
time_shuffle: 1
|
| 67 |
+
lossconfig:
|
| 68 |
+
target: audioldm_train.losses.LPIPSWithDiscriminator
|
| 69 |
+
params:
|
| 70 |
+
disc_start: 50001
|
| 71 |
+
kl_weight: 1000.0
|
| 72 |
+
disc_weight: 0.5
|
| 73 |
+
disc_in_channels: 1
|
| 74 |
+
ddconfig:
|
| 75 |
+
double_z: true
|
| 76 |
+
mel_bins: *mel_bins # The frequency bins of mel spectrogram
|
| 77 |
+
z_channels: 8
|
| 78 |
+
resolution: 256
|
| 79 |
+
downsample_time: false
|
| 80 |
+
in_channels: 1
|
| 81 |
+
out_ch: 1
|
| 82 |
+
ch: 128
|
| 83 |
+
ch_mult:
|
| 84 |
+
- 1
|
| 85 |
+
- 2
|
| 86 |
+
- 4
|
| 87 |
+
num_res_blocks: 2
|
| 88 |
+
attn_resolutions: []
|
| 89 |
+
dropout: 0.0
|
| 90 |
+
|
| 91 |
+
# Other parameters
|
| 92 |
+
base_learning_rate: 1.0e-4
|
| 93 |
+
warmup_steps: *warmup_steps
|
| 94 |
+
optimize_ddpm_parameter: *optimize_ddpm_parameter
|
| 95 |
+
sampling_rate: *sampling_rate
|
| 96 |
+
batchsize: 1
|
| 97 |
+
linear_start: 0.0015
|
| 98 |
+
linear_end: 0.0195
|
| 99 |
+
num_timesteps_cond: 1
|
| 100 |
+
log_every_t: 200
|
| 101 |
+
timesteps: 1000
|
| 102 |
+
unconditional_prob_cfg: 0.1
|
| 103 |
+
parameterization: eps # [eps, x0, v]
|
| 104 |
+
first_stage_key: fbank
|
| 105 |
+
latent_t_size: *latent_t_size # TODO might need to change
|
| 106 |
+
latent_f_size: *latent_f_size
|
| 107 |
+
channels: *latent_embed_dim # TODO might need to change
|
| 108 |
+
monitor: val/loss_simple_ema
|
| 109 |
+
scale_by_std: true
|
| 110 |
+
unet_config:
|
| 111 |
+
target: audioldm_train.modules.diffusionmodules.openaimodel.UNetModel
|
| 112 |
+
params:
|
| 113 |
+
image_size: 64
|
| 114 |
+
extra_film_condition_dim: 512 # If you use film as extra condition, set this parameter. For example if you have two conditioning vectors each have dimension 512, then this number would be 1024
|
| 115 |
+
# context_dim:
|
| 116 |
+
# - 768
|
| 117 |
+
in_channels: *unet_in_channels # The input channel of the UNet model
|
| 118 |
+
out_channels: *latent_embed_dim # TODO might need to change
|
| 119 |
+
model_channels: 128 # TODO might need to change
|
| 120 |
+
attention_resolutions:
|
| 121 |
+
- 8
|
| 122 |
+
- 4
|
| 123 |
+
- 2
|
| 124 |
+
num_res_blocks: 2
|
| 125 |
+
channel_mult:
|
| 126 |
+
- 1
|
| 127 |
+
- 2
|
| 128 |
+
- 3
|
| 129 |
+
- 5
|
| 130 |
+
num_head_channels: 32
|
| 131 |
+
use_spatial_transformer: true
|
| 132 |
+
transformer_depth: 1
|
| 133 |
+
extra_sa_layer: false
|
| 134 |
+
|
| 135 |
+
cond_stage_config:
|
| 136 |
+
film_clap_cond1:
|
| 137 |
+
cond_stage_key: text
|
| 138 |
+
conditioning_key: film
|
| 139 |
+
target: audioldm_train.conditional_models.CLAPAudioEmbeddingClassifierFreev2
|
| 140 |
+
params:
|
| 141 |
+
pretrained_path: ckpts/clap_htsat_tiny.pt
|
| 142 |
+
sampling_rate: 16000
|
| 143 |
+
embed_mode: text # or text
|
| 144 |
+
amodel: HTSAT-tiny
|
| 145 |
+
|
| 146 |
+
evaluation_params:
|
| 147 |
+
unconditional_guidance_scale: 3.5
|
| 148 |
+
ddim_sampling_steps: 200
|
| 149 |
+
n_candidates_per_samples: 3
|
latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=119999.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81874520599c1cdc48f87af9a3d41ce228de409b9a31eae73e2c218819ee7feb
|
| 3 |
+
size 4910952259
|
latent_diffusion/my_configs/audioldm_original/checkpoints/checkpoint-fad-133.00-global_step=79999.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf8938853f65a559040b00d8164b58aa541396d807b5a6e3ba8c8ab9ef5285a9
|
| 3 |
+
size 4910952259
|
latent_diffusion/my_configs/audioldm_original/infer_07-23-08:28_cfg_scale_3.5_ddim_200_n_cand_3/-0xzrMun0Rs.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:325f313d578e4057eab5474cae4744464936581e7b71446bfbb003f96fc343eb
|
| 3 |
+
size 327788
|
testset_data/audiocaps/05OJDYeHLMc.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdb94d0d33f4b21375b3bef8d2ffd4ea063e4ad1bf08751cceacdb3ae3e7c400
|
| 3 |
+
size 320044
|
testset_data/audiocaps/0Wdh45yt7tY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f01912fdf5fae71dde63f09749570faf29c16ff8b289e3e23f88a681890cc45
|
| 3 |
+
size 320044
|
testset_data/audiocaps/2RMOegT2Jn8.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:020e2365d24b30c3ef632e9195bbe9dd56b643e6691bd3a6b44572df3bee92b2
|
| 3 |
+
size 320044
|
testset_data/audiocaps/2gvyOxKuQPY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef302a002971776b76428d76769766ae847f291f1ace33099b9e225f5ada710b
|
| 3 |
+
size 320044
|
testset_data/audiocaps/3IYd8cCmUkQ.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85fb31647ed1e05f2eaf7bdede7f9097ee0cabad07fec6168b6d7154e102a5a6
|
| 3 |
+
size 320044
|
testset_data/audiocaps/3Xoz87_SUdw.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee133da9f25dcb3a20a0299f732a39208be2c6f2546a5b14b5973db9f858b2f6
|
| 3 |
+
size 320044
|
testset_data/audiocaps/4mtfOkzOvBI.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b245df371a41e0949fc0ef1181efaf1438e610d76348aa174303f19eb8a53065
|
| 3 |
+
size 320044
|
testset_data/audiocaps/6uIOGE36tWo.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be77f7f2f24d4fa6b1b32e9b6bf78d3d391ec1ad9a0a3cc65e2f6c57ca497845
|
| 3 |
+
size 320044
|
testset_data/audiocaps/7_Sr2zv1sQc.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3d1b94abf9d28803a5744b60e4de72bbe462aa817261ba8c3ee7a503d06a0db
|
| 3 |
+
size 320044
|
testset_data/audiocaps/8Bu8CkR1xZY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6f10d6da1e05c7978afa1b028fc755fe1d23d3796263d3878eb3ba83aaa085d
|
| 3 |
+
size 320044
|
testset_data/audiocaps/A3vsvL-Yx0Q.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f00c60d9e5e2b9071e8f2f898adaf78f7d392acf7274bf9b6738fffead31bf27
|
| 3 |
+
size 320044
|
testset_data/audiocaps/AHrUfa2H_5s.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5da7c879adac1e1624eb3483073f493fd0c837eadd497a45561850c5dad71eb5
|
| 3 |
+
size 320044
|
testset_data/audiocaps/B90BOtSOD2Q.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54ef7017a184ee6c893bae9b484361e2c8d4acfabadfccca83ff7398fb5a8a22
|
| 3 |
+
size 320044
|
testset_data/audiocaps/Bl-lCgr5hGY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4ad3a9721ab468d3abecd3bfb33a04806dd7e83bf3ce60ad8eb1c5b2190722e
|
| 3 |
+
size 320044
|
testset_data/audiocaps/ByOqw8M2U-Q.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9dbbacf98fac39663c9ed5fb0b58302884c0ecb1375cdbc4cb18e6b8fd8e98c
|
| 3 |
+
size 320044
|
testset_data/audiocaps/Cy3HWnwMLyI.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccde8b6db494aafb5eae8bd2a69a6f9b381073b9ff242a89a9169ea249692a6c
|
| 3 |
+
size 320044
|
testset_data/audiocaps/D3f5VIJYR7M.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0bf1d4c2bd7315270f311e409eb8e32e37bf591235d6feda184ad9cba0b5601
|
| 3 |
+
size 320044
|
testset_data/audiocaps/DW3z-ByrfWY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:149f1afc2bb775a7c99ccb09c8e6e603b2a3ed5dd8c6c93028a9720bb1b4e99f
|
| 3 |
+
size 320044
|
testset_data/audiocaps/EOaQnfDjVyo.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40b92a82880c5c2ec12da32228887e0a721bd460cd3576ba21ff67bb127d4a8b
|
| 3 |
+
size 320044
|
testset_data/audiocaps/Fa1c4qfBqzE.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57a9d87e3781a8d74a97e6d9a28127aec0072c7ce34b9d4739f0e1a41cc28ccb
|
| 3 |
+
size 320044
|
testset_data/audiocaps/HpkPTa1fQDE.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3ae0d62cb8060e0a9101139c96df1867128f79182b35c7edb19b3e57295cdfe
|
| 3 |
+
size 320044
|
testset_data/audiocaps/KpwdlYIdtfs.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0280771fbe19535d4684992f9d3df3bdbfceebb670a7ebcb2e4834e4920b4843
|
| 3 |
+
size 320044
|
testset_data/audiocaps/LAHWV6fZwUk.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64ba52fb3c6fe92f0fd689cae48f5d189a00306ceda745004b2d79db7d70a949
|
| 3 |
+
size 320044
|
testset_data/audiocaps/NSyqj1DXZKg.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4ac4e9613525a966dd9da905529621422489ee17a4d789b1e05efefd8e3e01e
|
| 3 |
+
size 320044
|
testset_data/audiocaps/NzVg-cFQJbE.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bfbd09f9483f395ea7b1a87dc7c3e18242ddf5f262a267ad54cc4241dfbd3db
|
| 3 |
+
size 320044
|
testset_data/audiocaps/TkclVqlyKx4.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9da3ca97bf3fa79478daccf8eb924062eca8af88dc99845c0827d9cbe78fa7fc
|
| 3 |
+
size 320044
|
testset_data/audiocaps/VkXLtUx-RmI.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:640f780ea1c043641462342fef56e9db077b0aaed1a636e9dd61b71c0c83d53f
|
| 3 |
+
size 320044
|
testset_data/audiocaps/XXBVsNt2Qr8.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5100bfe43151f773135294c6fa17a77cc92a1f901cb84c1c5be75358e7618645
|
| 3 |
+
size 320044
|
testset_data/audiocaps/YwUa3OS92ZQ.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:031d0952cec730868e7ad3c290005ed0fe55470cdb3e029545768b4605fb88c7
|
| 3 |
+
size 320044
|
testset_data/audiocaps/_gWEpDgPAho.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bec87ac5b40c2d9ebf26118b12d4f90a4f48e3766608ba4ed27f05c63e31c7b
|
| 3 |
+
size 320044
|
testset_data/audiocaps/_u2cNlW5DxQ.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2bf139665d39580e62359d131938a1f260219a3cc0e1a279f0c107e588fb145
|
| 3 |
+
size 320044
|
testset_data/audiocaps/bZuXMxR2S4U.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fee3ac0c043fd0e9245b202a92cab10d3b7d3ddb585e9f1e7640e34c93d500d3
|
| 3 |
+
size 320044
|
testset_data/audiocaps/bl-eQ8XD5CY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f569b41c49db3364ab5284f6d20c92046afb386501df08fc865e548e58a5cb6e
|
| 3 |
+
size 320044
|
testset_data/audiocaps/dPCj4WhTZ3c.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f83cbd8264ded5bb2d573d73d318e6fb1a36988b288e96a4ef82312ea8b5b3e9
|
| 3 |
+
size 320044
|
testset_data/audiocaps/dTVkk4GSmNA.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c79ad7f36214e686c35ae3d234e865862190ed6e76c8d0eec996ff5efe64939f
|
| 3 |
+
size 320044
|
testset_data/audiocaps/fJ6ZeWYfLjA.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3954307043dd101f3a270cccdfd59a960a4a7c3e5df478881ca0375049a1d87d
|
| 3 |
+
size 320044
|
testset_data/audiocaps/gW33LYEvoaw.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c12539607e235125526aef365e551a45b8b5212f27f9988b6f9617a20c891039
|
| 3 |
+
size 320044
|
testset_data/audiocaps/hM88FG1_D5Q.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0babba6248fc32bb99d85fd90e0f3beb3b84fa8d86f45e7f5cf1e28b5a35f3c
|
| 3 |
+
size 320044
|
testset_data/audiocaps/jqiD3VeM_hY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8473f523e5cf35621318aa382c47ad5863df18d9409e81c859440e6a928793e2
|
| 3 |
+
size 320044
|
testset_data/audiocaps/mFcHGbnNtSQ.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:644b53e6bdf01ef750911911acca3b492f04a966418c93072695864a06aefdb1
|
| 3 |
+
size 320044
|
testset_data/audiocaps/oxZjkE984Uo.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1c619b9b340a904e7f3026be08a0741a74c075282aaa7cedf29f6a448a890ae
|
| 3 |
+
size 320044
|
testset_data/audiocaps/s72505MIhz8.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eea74f52099773a9db2444cae186a025d5950c729b8d4cc604ba18fd0f882de
|
| 3 |
+
size 320044
|
testset_data/audiocaps/xBDcJKb-9vk.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0d5718a1c5f233d0639a098e008f373724710d856723c3a2b3e5ceeb38628ca
|
| 3 |
+
size 320044
|
testset_data/audiocaps/xmaVWayBvaY.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:873f54931176cc29d03ca870612ebb6ba1cb55549e3c07cfc8cd30a45d9fe1d8
|
| 3 |
+
size 320044
|
testset_data/audiocaps/y6iMm7Pltq0.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a3092ec17275e2a16280d09d6cc5e27c8c0571f366abdb491ff7844941cc208
|
| 3 |
+
size 320044
|