IbrahimSalah commited on
Commit
5adc53a
·
verified ·
1 Parent(s): c36fd63

Upload 2 files

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. F5TTS_Base.yaml +52 -0
  3. reference.wav +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ reference.wav filter=lfs diff=lfs merge=lfs -text
F5TTS_Base.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: Emilia_ZH_EN # dataset name
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_Base # model name
22
+ tokenizer: pinyin # tokenizer type
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 1024
27
+ depth: 22
28
+ heads: 16
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: False
32
+ conv_layers: 4
33
+ pe_attn_head: 1
34
+ checkpoint_activations: False # recompute activations and save memory for extra compute
35
+ mel_spec:
36
+ target_sample_rate: 24000
37
+ n_mel_channels: 100
38
+ hop_length: 256
39
+ win_length: 1024
40
+ n_fft: 1024
41
+ mel_spec_type: vocos # vocos | bigvgan
42
+ vocoder:
43
+ is_local: False # use local offline ckpt or not
44
+ local_path: null # local vocoder path
45
+
46
+ ckpts:
47
+ logger: wandb # wandb | tensorboard | null
48
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
49
+ save_per_updates: 50000 # save checkpoint per updates
50
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
51
+ last_per_updates: 5000 # save last checkpoint per updates
52
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
reference.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6db1e038c67df75cdde9ad1e43ba05f660eebc9346a30617d9b2f3892a5b201
3
+ size 1058478