taresh18 commited on
Commit
2dd6e38
·
verified ·
1 Parent(s): f54b507

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +48 -0
config.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ output_dir: outputs
2
+ resume: false # resume from last checkpoint
3
+
4
+ # model
5
+ latent_dim: 64 # dimensionality of latent vector
6
+ codebook_size: 1024 # number of entries per codebook (K)
7
+ num_rvq_levels: 8 # number of residual quantization levels
8
+ codebook_dim: 8 # codebook embedding dim
9
+
10
+ grad_accum_steps: 1 # gradient accumulation steps
11
+ batch_size: 96
12
+ num_epochs: 50
13
+ lr: 1.0e-4 # initial learning rate
14
+ lr_min: 1.0e-5 # minimum learning rate at end of cosine schedule
15
+ adam_beta1: 0.8
16
+ adam_beta2: 0.99
17
+ beta: 0.25 # commitment loss weight
18
+ use_amp: true # mixed precision training
19
+
20
+ # dataset
21
+ librispeech_url: train-clean-100 # LibriSpeech split (train-clean-100 = ~100 hours, ~6GB)
22
+ data_dir: /workspace/data
23
+ num_workers: 8 # dataloader workers
24
+ sample_rate: 16000 # LibriSpeech native sample rate
25
+ chunk_size: 16384 # ~1 sec segment, must be divisible by 128 (encoder downsample factor)
26
+ max_chunks: null # how many samples to consider, null to use complete dataset
27
+ streaming: true # true=load from disk on-the-fly, false=load .pt shards into RAM
28
+
29
+ # loss functions
30
+ loss_type: mse+stft+mel # mse, stft, mel, mse+stft, mse+mel, mse+stft+mel
31
+ lambda_mse: 0.1 # small
32
+ lambda_stft: 1.0 # multi-resolution STFT loss
33
+ lambda_mel: 15.0 # mel loss weight
34
+
35
+ # eval
36
+ num_eval_samples: 3 # number of fixed samples to reconstruct on each new best
37
+
38
+ # compile
39
+ compile: true # enable torch.compile
40
+ compile_mode: default # default, max-autotune (reduce-overhead conflicts with weight_norm)
41
+
42
+ # profiling
43
+ profile: false # profile first 5 batches
44
+
45
+ # logging
46
+ use_wandb: true # enable/disable wandb logging
47
+ wandb_project: audio-codec # wandb project name
48
+ log_interval: 50 # log to wandb every N batches