niobures commited on
Commit
39a0cc7
·
verified ·
1 Parent(s): 3043c68

Seed-VC (models)

Browse files
Files changed (35) hide show
  1. models/Seed-VC (memescreamer)/.gitattributes +35 -0
  2. models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth +3 -0
  3. models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema.pth +3 -0
  4. models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth +3 -0
  5. models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth +3 -0
  6. models/Seed-VC (memescreamer)/DiT_uvit_tat_xlsr_ema.pth +3 -0
  7. models/Seed-VC (memescreamer)/README.md +18 -0
  8. models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml +98 -0
  9. models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_whisper_small_wavenet.yml +91 -0
  10. models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_xlsr_tiny.yml +82 -0
  11. models/Seed-VC (memescreamer)/config_dit_mel_seed_wavenet.yml +79 -0
  12. models/Seed-VC (memescreamer)/hifigan.yml +25 -0
  13. models/Seed-VC (memescreamer)/hift.pt +3 -0
  14. models/Seed-VC (memescreamer)/rmvpe/rmvpe.pt +3 -0
  15. models/Seed-VC (memescreamer)/se_db.pt +3 -0
  16. models/Seed-VC (memescreamer)/source.txt +1 -0
  17. models/Seed-VC (memescreamer)/v2/ar_base.pth +3 -0
  18. models/Seed-VC (memescreamer)/v2/cfm_small.pth +3 -0
  19. models/malayalam-seedvc-v2/.gitattributes +35 -0
  20. models/malayalam-seedvc-v2/AR_epoch_00666_step_50000.pth +3 -0
  21. models/malayalam-seedvc-v2/CFM_epoch_00666_step_50000.pth +3 -0
  22. models/malayalam-seedvc-v2/languages.txt +1 -0
  23. models/malayalam-seedvc-v2/source.txt +1 -0
  24. models/malayalam-seedvc-v2/vc_wrapper.yaml +105 -0
  25. models/polyphony-seed-vc/.gitattributes +35 -0
  26. models/polyphony-seed-vc/.hydra/config.yaml +100 -0
  27. models/polyphony-seed-vc/.hydra/hydra.yaml +155 -0
  28. models/polyphony-seed-vc/.hydra/overrides.yaml +1 -0
  29. models/polyphony-seed-vc/DiT_epoch_00052_step_09000.pth +3 -0
  30. models/polyphony-seed-vc/DiT_epoch_00055_step_09500.pth +3 -0
  31. models/polyphony-seed-vc/events.out.tfevents.1774949625.85bdaf5299a2.4433.0 +3 -0
  32. models/polyphony-seed-vc/ft_model.pth +3 -0
  33. models/polyphony-seed-vc/source.txt +1 -0
  34. models/polyphony-seed-vc/train.log +81 -0
  35. models/seed-vc-heb/languages.txt +1 -0
models/Seed-VC (memescreamer)/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4145da6704ca0950c2ef6312c1cd0c15c370b13613faed2b4a9d9d71bb4ed036
3
+ size 820863652
models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883d19be9fe9c4bb2dce30d545cd77e5ed2b6326c7ed1c4d9a7ce8e555b163f9
3
+ size 820864573
models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42aef93ffe65857c840d270252fa040f7ba04514945ec460f3ac1ac2a96de684
3
+ size 820865494
models/Seed-VC (memescreamer)/DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec8841b20bb46df9f7e8e570a6946a4b87b940133c7f0e778487ff33841f720
3
+ size 440312082
models/Seed-VC (memescreamer)/DiT_uvit_tat_xlsr_ema.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c853ea578b409f625f961bcb15d5cff1f8ef9a75f3209ec21d9b7c73ab422e88
3
+ size 142112203
models/Seed-VC (memescreamer)/README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - mirror
4
+ - raidio-bot
5
+ ---
6
+
7
+ # Mirror of Plachta/Seed-VC
8
+
9
+ This is a pinned mirror of [Plachta/Seed-VC](https://huggingface.co/Plachta/Seed-VC).
10
+
11
+ | Field | Value |
12
+ |-------|-------|
13
+ | Upstream | [Plachta/Seed-VC](https://huggingface.co/Plachta/Seed-VC) |
14
+ | Revision | `257283f9f41585055e8f858fba4fd044e5caed6e` |
15
+ | Mirror org | [memescreamer](https://huggingface.co/memescreamer) |
16
+
17
+ This mirror exists to ensure availability of pinned model weights
18
+ regardless of upstream changes.
models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "./runs/run_dit_mel_seed_uvit_whisper_base_f0_44k"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ save_interval: 1000
5
+ device: "cuda"
6
+ epochs: 1000 # number of epochs for first stage training (pre-training)
7
+ batch_size: 1
8
+ batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
+ max_len: 80 # maximum number of frames
10
+ pretrained_model: ""
11
+ pretrained_encoder: ""
12
+ load_only_params: False # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ preprocess_params:
15
+ sr: 44100
16
+ spect_params:
17
+ n_fft: 2048
18
+ win_length: 2048
19
+ hop_length: 512
20
+ n_mels: 128
21
+ fmin: 0
22
+ fmax: "None"
23
+
24
+ model_params:
25
+ dit_type: "DiT" # uDiT or DiT
26
+ reg_loss_type: "l1" # l1 or l2
27
+
28
+ timbre_shifter:
29
+ se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
30
+ ckpt_path: './modules/openvoice/checkpoints_v2/converter'
31
+
32
+ vocoder:
33
+ type: "bigvgan"
34
+ name: "nvidia/bigvgan_v2_44khz_128band_512x"
35
+
36
+ speech_tokenizer:
37
+ type: 'whisper'
38
+ name: "openai/whisper-small"
39
+
40
+ style_encoder:
41
+ dim: 192
42
+ campplus_path: "campplus_cn_common.bin"
43
+
44
+ DAC:
45
+ encoder_dim: 64
46
+ encoder_rates: [2, 5, 5, 6]
47
+ decoder_dim: 1536
48
+ decoder_rates: [ 6, 5, 5, 2 ]
49
+ sr: 24000
50
+
51
+ length_regulator:
52
+ channels: 768
53
+ is_discrete: false
54
+ in_channels: 768
55
+ content_codebook_size: 2048
56
+ sampling_ratios: [1, 1, 1, 1]
57
+ vector_quantize: false
58
+ n_codebooks: 1
59
+ quantizer_dropout: 0.0
60
+ f0_condition: true
61
+ n_f0_bins: 256
62
+
63
+ DiT:
64
+ hidden_dim: 768
65
+ num_heads: 12
66
+ depth: 17
67
+ class_dropout_prob: 0.1
68
+ block_size: 8192
69
+ in_channels: 128
70
+ style_condition: true
71
+ final_layer_type: 'mlp'
72
+ target: 'mel' # mel or codec
73
+ content_dim: 768
74
+ content_codebook_size: 1024
75
+ content_type: 'discrete'
76
+ f0_condition: true
77
+ n_f0_bins: 256
78
+ content_codebooks: 1
79
+ is_causal: false
80
+ long_skip_connection: false
81
+ zero_prompt_speech_token: false # for prompt component, do not input corresponding speech token
82
+ time_as_token: false
83
+ style_as_token: false
84
+ uvit_skip_connection: true
85
+ add_resblock_in_transformer: false
86
+
87
+ wavenet:
88
+ hidden_dim: 768
89
+ num_layers: 8
90
+ kernel_size: 5
91
+ dilation_rate: 1
92
+ p_dropout: 0.2
93
+ style_condition: true
94
+
95
+ loss_params:
96
+ base_lr: 0.0001
97
+ lambda_mel: 45
98
+ lambda_kl: 1.0
models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_whisper_small_wavenet.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "./runs/run_dit_mel_seed_uvit_whisper_small_wavenet"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ save_interval: 1000
5
+ device: "cuda"
6
+ epochs: 1000 # number of epochs for first stage training (pre-training)
7
+ batch_size: 2
8
+ batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
+ max_len: 80 # maximum number of frames
10
+ pretrained_model: ""
11
+ pretrained_encoder: "./temp_ckpt.pth"
12
+ load_only_params: False # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ preprocess_params:
15
+ sr: 22050
16
+ spect_params:
17
+ n_fft: 1024
18
+ win_length: 1024
19
+ hop_length: 256
20
+ n_mels: 80
21
+ fmin: 0
22
+ fmax: "None"
23
+
24
+ model_params:
25
+ dit_type: "DiT" # uDiT or DiT
26
+ reg_loss_type: "l1" # l1 or l2
27
+
28
+ timbre_shifter:
29
+ se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
30
+ ckpt_path: './modules/openvoice/checkpoints_v2/converter'
31
+
32
+ speech_tokenizer:
33
+ type: 'whisper'
34
+ name: "openai/whisper-small"
35
+
36
+ style_encoder:
37
+ dim: 192
38
+ campplus_path: "campplus_cn_common.bin"
39
+
40
+ vocoder:
41
+ type: "bigvgan"
42
+ name: "nvidia/bigvgan_v2_22khz_80band_256x"
43
+
44
+ length_regulator:
45
+ channels: 512
46
+ is_discrete: false
47
+ in_channels: 768
48
+ content_codebook_size: 2048
49
+ sampling_ratios: [1, 1, 1, 1]
50
+ vector_quantize: false
51
+ n_codebooks: 1
52
+ quantizer_dropout: 0.0
53
+ f0_condition: false
54
+ n_f0_bins: 512
55
+
56
+ DiT:
57
+ hidden_dim: 512
58
+ num_heads: 8
59
+ depth: 13
60
+ class_dropout_prob: 0.1
61
+ block_size: 8192
62
+ in_channels: 80
63
+ style_condition: true
64
+ final_layer_type: 'wavenet'
65
+ target: 'mel' # mel or codec
66
+ content_dim: 512
67
+ content_codebook_size: 1024
68
+ content_type: 'discrete'
69
+ f0_condition: false
70
+ n_f0_bins: 512
71
+ content_codebooks: 1
72
+ is_causal: false
73
+ long_skip_connection: true
74
+ zero_prompt_speech_token: false # for prompt component, do not input corresponding speech token
75
+ time_as_token: false
76
+ style_as_token: false
77
+ uvit_skip_connection: true
78
+ add_resblock_in_transformer: false
79
+
80
+ wavenet:
81
+ hidden_dim: 512
82
+ num_layers: 8
83
+ kernel_size: 5
84
+ dilation_rate: 1
85
+ p_dropout: 0.2
86
+ style_condition: true
87
+
88
+ loss_params:
89
+ base_lr: 0.0001
90
+ lambda_mel: 45
91
+ lambda_kl: 1.0
models/Seed-VC (memescreamer)/config_dit_mel_seed_uvit_xlsr_tiny.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "runs/run_mel_seed_uvit_xlsr_tiny"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ save_interval: 500
5
+ device: "cuda"
6
+ epochs: 1000 # number of epochs for first stage training (pre-training)
7
+ batch_size: 2
8
+ batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
+ max_len: 80 # maximum number of frames
10
+ pretrained_model: "DiT_uvit_tat_xlsr_ema.pth"
11
+ pretrained_encoder: ""
12
+ load_only_params: False # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ preprocess_params:
15
+ sr: 22050
16
+ spect_params:
17
+ n_fft: 1024
18
+ win_length: 1024
19
+ hop_length: 256
20
+ n_mels: 80
21
+ fmin: 0
22
+ fmax: 8000
23
+
24
+ model_params:
25
+ dit_type: "DiT" # uDiT or DiT
26
+ reg_loss_type: "l1" # l1 or l2
27
+ diffusion_type: "flow"
28
+
29
+ timbre_shifter:
30
+ se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
31
+ ckpt_path: './modules/openvoice/checkpoints_v2/converter'
32
+
33
+ vocoder:
34
+ type: "hifigan"
35
+
36
+ speech_tokenizer:
37
+ type: 'xlsr'
38
+ output_layer: 12
39
+ name: 'facebook/wav2vec2-xls-r-300m'
40
+
41
+ style_encoder:
42
+ dim: 192
43
+ campplus_path: "campplus_cn_common.bin"
44
+
45
+ length_regulator:
46
+ channels: 384
47
+ is_discrete: false
48
+ in_channels: 1024
49
+ content_codebook_size: 1024
50
+ sampling_ratios: [1, 1, 1, 1]
51
+ vector_quantize: false
52
+ n_codebooks: 2
53
+ quantizer_dropout: 0.0
54
+ f0_condition: false
55
+ n_f0_bins: 512
56
+
57
+ DiT:
58
+ hidden_dim: 384
59
+ num_heads: 6
60
+ depth: 9
61
+ class_dropout_prob: 0.1
62
+ block_size: 8192
63
+ in_channels: 80
64
+ style_condition: true
65
+ final_layer_type: 'mlp'
66
+ target: 'mel' # mel or betavae
67
+ content_dim: 384
68
+ content_codebook_size: 1024
69
+ content_type: 'discrete'
70
+ f0_condition: false
71
+ n_f0_bins: 512
72
+ content_codebooks: 1
73
+ is_causal: false
74
+ long_skip_connection: false
75
+ zero_prompt_speech_token: false # for prompt component, do not input corresponding speech token
76
+ time_as_token: true
77
+ style_as_token: true
78
+ uvit_skip_connection: true
79
+ add_resblock_in_transformer: false
80
+
81
+ loss_params:
82
+ base_lr: 0.0001
models/Seed-VC (memescreamer)/config_dit_mel_seed_wavenet.yml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "./runs/run_dit_mel_seed"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ save_interval: 1000
5
+ device: "cuda"
6
+ epochs: 1000 # number of epochs for first stage training (pre-training)
7
+ batch_size: 4
8
+ batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
+ max_len: 80 # maximum number of frames
10
+ pretrained_model: ""
11
+ pretrained_encoder: ""
12
+ load_only_params: False # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ F0_path: "modules/JDC/bst.t7"
15
+
16
+ preprocess_params:
17
+ sr: 22050
18
+ spect_params:
19
+ n_fft: 1024
20
+ win_length: 1024
21
+ hop_length: 256
22
+ n_mels: 80
23
+
24
+ model_params:
25
+ dit_type: "DiT" # uDiT or DiT
26
+ reg_loss_type: "l2" # l1 or l2
27
+
28
+ speech_tokenizer:
29
+ path: "speech_tokenizer_v1.onnx"
30
+
31
+ style_encoder:
32
+ dim: 192
33
+ campplus_path: "campplus_cn_common.bin"
34
+
35
+ DAC:
36
+ encoder_dim: 64
37
+ encoder_rates: [2, 5, 5, 6]
38
+ decoder_dim: 1536
39
+ decoder_rates: [ 6, 5, 5, 2 ]
40
+ sr: 24000
41
+
42
+ length_regulator:
43
+ channels: 768
44
+ is_discrete: true
45
+ content_codebook_size: 4096
46
+ in_frame_rate: 50
47
+ out_frame_rate: 80
48
+ sampling_ratios: [1, 1, 1, 1]
49
+
50
+ DiT:
51
+ hidden_dim: 768
52
+ num_heads: 12
53
+ depth: 12
54
+ class_dropout_prob: 0.1
55
+ block_size: 4096
56
+ in_channels: 80
57
+ style_condition: true
58
+ final_layer_type: 'wavenet'
59
+ target: 'mel' # mel or codec
60
+ content_dim: 768
61
+ content_codebook_size: 1024
62
+ content_type: 'discrete'
63
+ f0_condition: false
64
+ n_f0_bins: 512
65
+ content_codebooks: 1
66
+ is_causal: false
67
+ long_skip_connection: true
68
+ zero_prompt_speech_token: false # for prompt component, do not input corresponding speech token
69
+
70
+ wavenet:
71
+ hidden_dim: 768
72
+ num_layers: 8
73
+ kernel_size: 5
74
+ dilation_rate: 1
75
+ p_dropout: 0.2
76
+ style_condition: true
77
+
78
+ loss_params:
79
+ base_lr: 0.0001
models/Seed-VC (memescreamer)/hifigan.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hift:
2
+ in_channels: 80
3
+ base_channels: 512
4
+ nb_harmonics: 8
5
+ sampling_rate: 22050
6
+ nsf_alpha: 0.1
7
+ nsf_sigma: 0.003
8
+ nsf_voiced_threshold: 10
9
+ upsample_rates: [8, 8]
10
+ upsample_kernel_sizes: [16, 16]
11
+ istft_params:
12
+ n_fft: 16
13
+ hop_len: 4
14
+ resblock_kernel_sizes: [3, 7, 11]
15
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
16
+ source_resblock_kernel_sizes: [7, 11]
17
+ source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5]]
18
+ lrelu_slope: 0.1
19
+ audio_limit: 0.99
20
+ f0_predictor:
21
+ num_class: 1
22
+ in_channels: 80
23
+ cond_channels: 512
24
+
25
+ pretrained_model_path: "hift.pt"
models/Seed-VC (memescreamer)/hift.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e679b6ca1eff71187ffb4f3ab0444935594cdcc20a9bd12afad111ef8d6012
3
+ size 81896716
models/Seed-VC (memescreamer)/rmvpe/rmvpe.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d62215f4306e3ca278246188607209f09af3dc77ed4232efdd069798c4ec193
3
+ size 181184272
models/Seed-VC (memescreamer)/se_db.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76e1ad4fac609cfa1a5198bf789427c5e7822d656fab1ffed6cd80fc0d381b7
3
+ size 102405286
models/Seed-VC (memescreamer)/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/memescreamer/Seed-VC
models/Seed-VC (memescreamer)/v2/ar_base.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e2afabf97e5d856580af1f81a9705423b4fb21bb9475cece9925b62355cc5c
3
+ size 358846893
models/Seed-VC (memescreamer)/v2/cfm_small.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ae18615dc8cb01710086629973198226dedc63ebeaab0aee17863d767b3474
3
+ size 353349694
models/malayalam-seedvc-v2/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/malayalam-seedvc-v2/AR_epoch_00666_step_50000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7e720aebeb993d4562a05857a547db24a1062fbd8260f06819fa317f4f1c05
3
+ size 1556030166
models/malayalam-seedvc-v2/CFM_epoch_00666_step_50000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3d743335e5cb034ea742743456712fc3936e97c04b7475a2d5311cd12f71e6
3
+ size 1574488384
models/malayalam-seedvc-v2/languages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Malayalam
models/malayalam-seedvc-v2/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Bajiyo/malayalam-seedvc-v2
models/malayalam-seedvc-v2/vc_wrapper.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: modules.v2.vc_wrapper.VoiceConversionWrapper
2
+ sr: 22050
3
+ hop_size: 256
4
+ mel_fn:
5
+ _target_: modules.audio.mel_spectrogram
6
+ _partial_: true
7
+ n_fft: 1024
8
+ win_size: 1024
9
+ hop_size: 256
10
+ num_mels: 80
11
+ sampling_rate: 22050
12
+ fmin: 0
13
+ fmax: null
14
+ center: False
15
+ cfm:
16
+ _target_: modules.v2.cfm.CFM
17
+ estimator:
18
+ _target_: modules.v2.dit_wrapper.DiT
19
+ time_as_token: true
20
+ style_as_token: true
21
+ uvit_skip_connection: false
22
+ block_size: 8192
23
+ depth: 13
24
+ num_heads: 8
25
+ hidden_dim: 512
26
+ in_channels: 80
27
+ content_dim: 512
28
+ style_encoder_dim: 192
29
+ class_dropout_prob: 0.1
30
+ dropout_rate: 0.0
31
+ attn_dropout_rate: 0.0
32
+ cfm_length_regulator:
33
+ _target_: modules.v2.length_regulator.InterpolateRegulator
34
+ channels: 512
35
+ is_discrete: true
36
+ codebook_size: 2048
37
+ sampling_ratios: [ 1, 1, 1, 1 ]
38
+ f0_condition: false
39
+ ar:
40
+ _target_: modules.v2.ar.NaiveWrapper
41
+ model:
42
+ _target_: modules.v2.ar.NaiveTransformer
43
+ config:
44
+ _target_: modules.v2.ar.NaiveModelArgs
45
+ dropout: 0.0
46
+ rope_base: 10000.0
47
+ dim: 768
48
+ head_dim: 64
49
+ n_local_heads: 2
50
+ intermediate_size: 2304
51
+ n_head: 12
52
+ n_layer: 12
53
+ vocab_size: 2049 # 1 + 1 for eos
54
+ ar_length_regulator:
55
+ _target_: modules.v2.length_regulator.InterpolateRegulator
56
+ channels: 768
57
+ is_discrete: true
58
+ codebook_size: 32
59
+ sampling_ratios: [ ]
60
+ f0_condition: false
61
+ style_encoder:
62
+ _target_: modules.campplus.DTDNN.CAMPPlus
63
+ feat_dim: 80
64
+ embedding_size: 192
65
+ content_extractor_narrow:
66
+ _target_: modules.astral_quantization.default_model.AstralQuantizer
67
+ tokenizer_name: "openai/whisper-small"
68
+ ssl_model_name: "facebook/hubert-large-ll60k"
69
+ ssl_output_layer: 18
70
+ skip_ssl: true
71
+ encoder: &bottleneck_encoder
72
+ _target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
73
+ dim: 512
74
+ num_blocks: 12
75
+ intermediate_dim: 1536
76
+ dilation: 1
77
+ input_dim: 1024
78
+ quantizer:
79
+ _target_: modules.astral_quantization.bsq.BinarySphericalQuantize
80
+ codebook_size: 32 # codebook size, must be a power of 2
81
+ dim: 512
82
+ entropy_loss_weight: 0.1
83
+ diversity_gamma: 1.0
84
+ spherical: True
85
+ enable_entropy_loss: True
86
+ soft_entropy_loss: True
87
+ content_extractor_wide:
88
+ _target_: modules.astral_quantization.default_model.AstralQuantizer
89
+ tokenizer_name: "openai/whisper-small"
90
+ ssl_model_name: "facebook/hubert-large-ll60k"
91
+ ssl_output_layer: 18
92
+ encoder: *bottleneck_encoder
93
+ quantizer:
94
+ _target_: modules.astral_quantization.bsq.BinarySphericalQuantize
95
+ codebook_size: 2048 # codebook size, must be a power of 2
96
+ dim: 512
97
+ entropy_loss_weight: 0.1
98
+ diversity_gamma: 1.0
99
+ spherical: True
100
+ enable_entropy_loss: True
101
+ soft_entropy_loss: True
102
+ vocoder:
103
+ _target_: modules.bigvgan.bigvgan.BigVGAN.from_pretrained
104
+ pretrained_model_name_or_path: "nvidia/bigvgan_v2_22khz_80band_256x"
105
+ use_cuda_kernel: false
models/polyphony-seed-vc/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/polyphony-seed-vc/.hydra/config.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_model: DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema.pth
2
+ log_interval: 10
3
+ preprocess_params:
4
+ sr: 44100
5
+ spect_params:
6
+ n_fft: 2048
7
+ win_length: 2048
8
+ hop_length: 512
9
+ n_mels: 128
10
+ fmin: 0
11
+ fmax: None
12
+ model_params:
13
+ dit_type: DiT
14
+ reg_loss_type: l1
15
+ timbre_shifter:
16
+ se_db_path: ./modules/openvoice/checkpoints_v2/converter/se_db.pt
17
+ ckpt_path: ./modules/openvoice/checkpoints_v2/converter
18
+ vocoder:
19
+ type: bigvgan
20
+ name: nvidia/bigvgan_v2_44khz_128band_512x
21
+ speech_tokenizer:
22
+ type: whisper
23
+ name: openai/whisper-small
24
+ style_encoder:
25
+ dim: 192
26
+ campplus_path: campplus_cn_common.bin
27
+ DAC:
28
+ encoder_dim: 64
29
+ encoder_rates:
30
+ - 2
31
+ - 5
32
+ - 5
33
+ - 6
34
+ decoder_dim: 1536
35
+ decoder_rates:
36
+ - 6
37
+ - 5
38
+ - 5
39
+ - 2
40
+ sr: 24000
41
+ length_regulator:
42
+ channels: 768
43
+ is_discrete: false
44
+ in_channels: 768
45
+ content_codebook_size: 2048
46
+ sampling_ratios:
47
+ - 1
48
+ - 1
49
+ - 1
50
+ - 1
51
+ vector_quantize: false
52
+ n_codebooks: 1
53
+ quantizer_dropout: 0.0
54
+ f0_condition: true
55
+ n_f0_bins: 256
56
+ DiT:
57
+ hidden_dim: 768
58
+ num_heads: 12
59
+ depth: 17
60
+ class_dropout_prob: 0.1
61
+ block_size: 8192
62
+ in_channels: 128
63
+ style_condition: true
64
+ final_layer_type: mlp
65
+ target: mel
66
+ content_dim: 768
67
+ content_codebook_size: 1024
68
+ content_type: discrete
69
+ f0_condition: true
70
+ n_f0_bins: 256
71
+ content_codebooks: 1
72
+ is_causal: false
73
+ long_skip_connection: false
74
+ zero_prompt_speech_token: false
75
+ time_as_token: false
76
+ style_as_token: false
77
+ uvit_skip_connection: true
78
+ add_resblock_in_transformer: false
79
+ wavenet:
80
+ hidden_dim: 768
81
+ num_layers: 8
82
+ kernel_size: 5
83
+ dilation_rate: 1
84
+ p_dropout: 0.2
85
+ style_condition: true
86
+ loss_params:
87
+ base_lr: 0.0001
88
+ lambda_mel: 45
89
+ lambda_kl: 1.0
90
+ run_name: my_run-hydra-nfdsnflksdjflkds
91
+ batch_size: 2
92
+ max_steps: 10000
93
+ max_epochs: 1000
94
+ save_every: 500
95
+ eval_every: 1
96
+ num_workers: 0
97
+ require_features: true
98
+ pretrained_ckpt: null
99
+ tensorboard_logging: true
100
+ mlflow_logging: false
models/polyphony-seed-vc/.hydra/hydra.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: runs/${run_name}_${now:%Y-%m-%d_%H-%M-%S}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: false
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: config
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.3.2
131
+ version_base: '1.3'
132
+ cwd: /workspace/polyphony-seed-vc
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /workspace/polyphony-seed-vc/configs
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /workspace/polyphony-seed-vc/runs/my_run-hydra-nfdsnflksdjflkds_2026-03-31_09-33-42
144
+ choices:
145
+ model: uvit_whisper_44k
146
+ hydra/env: default
147
+ hydra/callbacks: null
148
+ hydra/job_logging: default
149
+ hydra/hydra_logging: default
150
+ hydra/hydra_help: default
151
+ hydra/help: default
152
+ hydra/sweeper: basic
153
+ hydra/launcher: basic
154
+ hydra/output: default
155
+ verbose: false
models/polyphony-seed-vc/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
models/polyphony-seed-vc/DiT_epoch_00052_step_09000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c45007f399556355beb653ebc46952947c71cfef26ff5f1402e2dcf1dcd916a
3
+ size 2246796075
models/polyphony-seed-vc/DiT_epoch_00055_step_09500.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cbccbbb94c152b16394fd2ef52f65e2179629734eb1341cd7fa4da5e30f6a98
3
+ size 2246796075
models/polyphony-seed-vc/events.out.tfevents.1774949625.85bdaf5299a2.4433.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a20464a3e2876dec659afbed2698613464cd75fd12efd9831c375eee92ac68
3
+ size 51905
models/polyphony-seed-vc/ft_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c03d59f289a60e95405f0a3ba839882fc60f7c61b323d77ade48d0bdc8cb0d1
3
+ size 785751672
models/polyphony-seed-vc/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/MaksymHalych/polyphony-seed-vc
models/polyphony-seed-vc/train.log ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-31 09:33:45,418][__main__][INFO] - Loaded HF checkpoint from ./checkpoints/models--Plachta--Seed-VC/snapshots/257283f9f41585055e8f858fba4fd044e5caed6e/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema.pth
2
+ [2026-03-31 09:34:45,467][__main__][INFO] - Eval loss at epoch 1: 0.5503
3
+ [2026-03-31 09:35:43,500][__main__][INFO] - Eval loss at epoch 2: 0.5421
4
+ [2026-03-31 09:36:37,482][__main__][INFO] - Saving checkpoint
5
+ [2026-03-31 09:36:44,539][__main__][INFO] - Eval loss at epoch 3: 0.5184
6
+ [2026-03-31 09:37:42,581][__main__][INFO] - Eval loss at epoch 4: 0.5492
7
+ [2026-03-31 09:38:41,957][__main__][INFO] - Eval loss at epoch 5: 0.5689
8
+ [2026-03-31 09:39:32,846][__main__][INFO] - Saving checkpoint
9
+ [2026-03-31 09:39:43,004][__main__][INFO] - Eval loss at epoch 6: 0.5222
10
+ [2026-03-31 09:40:41,991][__main__][INFO] - Eval loss at epoch 7: 0.5251
11
+ [2026-03-31 09:41:41,652][__main__][INFO] - Eval loss at epoch 8: 0.5492
12
+ [2026-03-31 09:42:26,318][__main__][INFO] - Saving checkpoint
13
+ [2026-03-31 09:42:41,890][__main__][INFO] - Eval loss at epoch 9: 0.5331
14
+ [2026-03-31 09:43:41,666][__main__][INFO] - Eval loss at epoch 10: 0.5145
15
+ [2026-03-31 09:44:40,606][__main__][INFO] - Eval loss at epoch 11: 0.5239
16
+ [2026-03-31 09:45:20,529][__main__][INFO] - Saving checkpoint
17
+ [2026-03-31 09:45:39,905][__main__][INFO] - Eval loss at epoch 12: 0.5110
18
+ [2026-03-31 09:46:39,583][__main__][INFO] - Eval loss at epoch 13: 0.5125
19
+ [2026-03-31 09:47:39,332][__main__][INFO] - Eval loss at epoch 14: 0.5054
20
+ [2026-03-31 09:48:18,655][__main__][INFO] - Saving checkpoint
21
+ [2026-03-31 09:48:39,514][__main__][INFO] - Eval loss at epoch 15: 0.5371
22
+ [2026-03-31 09:49:37,754][__main__][INFO] - Eval loss at epoch 16: 0.4939
23
+ [2026-03-31 09:50:36,564][__main__][INFO] - Eval loss at epoch 17: 0.5318
24
+ [2026-03-31 09:51:12,058][__main__][INFO] - Saving checkpoint
25
+ [2026-03-31 09:51:35,973][__main__][INFO] - Eval loss at epoch 18: 0.5058
26
+ [2026-03-31 09:52:34,475][__main__][INFO] - Eval loss at epoch 19: 0.5134
27
+ [2026-03-31 09:53:34,121][__main__][INFO] - Eval loss at epoch 20: 0.5317
28
+ [2026-03-31 09:54:08,935][__main__][INFO] - Saving checkpoint
29
+ [2026-03-31 09:54:34,447][__main__][INFO] - Eval loss at epoch 21: 0.5369
30
+ [2026-03-31 09:55:33,298][__main__][INFO] - Eval loss at epoch 22: 0.5119
31
+ [2026-03-31 09:56:32,735][__main__][INFO] - Eval loss at epoch 23: 0.5454
32
+ [2026-03-31 09:57:03,544][__main__][INFO] - Saving checkpoint
33
+ [2026-03-31 09:57:33,559][__main__][INFO] - Eval loss at epoch 24: 0.5248
34
+ [2026-03-31 09:58:33,395][__main__][INFO] - Eval loss at epoch 25: 0.5219
35
+ [2026-03-31 09:59:32,889][__main__][INFO] - Eval loss at epoch 26: 0.5220
36
+ [2026-03-31 09:59:59,173][__main__][INFO] - Saving checkpoint
37
+ [2026-03-31 10:00:32,473][__main__][INFO] - Eval loss at epoch 27: 0.5058
38
+ [2026-03-31 10:01:31,127][__main__][INFO] - Eval loss at epoch 28: 0.5103
39
+ [2026-03-31 10:02:29,941][__main__][INFO] - Eval loss at epoch 29: 0.5056
40
+ [2026-03-31 10:02:54,663][__main__][INFO] - Saving checkpoint
41
+ [2026-03-31 10:03:30,831][__main__][INFO] - Eval loss at epoch 30: 0.5360
42
+ [2026-03-31 10:04:29,601][__main__][INFO] - Eval loss at epoch 31: 0.4924
43
+ [2026-03-31 10:05:29,342][__main__][INFO] - Eval loss at epoch 32: 0.4983
44
+ [2026-03-31 10:05:48,616][__main__][INFO] - Saving checkpoint
45
+ [2026-03-31 10:06:30,850][__main__][INFO] - Eval loss at epoch 33: 0.4768
46
+ [2026-03-31 10:07:30,413][__main__][INFO] - Eval loss at epoch 34: 0.4945
47
+ [2026-03-31 10:08:29,116][__main__][INFO] - Eval loss at epoch 35: 0.4879
48
+ [2026-03-31 10:08:46,756][__main__][INFO] - Saving checkpoint
49
+ [2026-03-31 10:09:29,204][__main__][INFO] - Eval loss at epoch 36: 0.5048
50
+ [2026-03-31 10:10:28,796][__main__][INFO] - Eval loss at epoch 37: 0.5370
51
+ [2026-03-31 10:11:28,681][__main__][INFO] - Eval loss at epoch 38: 0.4983
52
+ [2026-03-31 10:11:41,783][__main__][INFO] - Saving checkpoint
53
+ [2026-03-31 10:12:29,540][__main__][INFO] - Eval loss at epoch 39: 0.4753
54
+ [2026-03-31 10:13:29,204][__main__][INFO] - Eval loss at epoch 40: 0.5071
55
+ [2026-03-31 10:14:27,339][__main__][INFO] - Eval loss at epoch 41: 0.4829
56
+ [2026-03-31 10:14:38,158][__main__][INFO] - Saving checkpoint
57
+ [2026-03-31 10:15:27,468][__main__][INFO] - Eval loss at epoch 42: 0.4967
58
+ [2026-03-31 10:16:26,999][__main__][INFO] - Eval loss at epoch 43: 0.4897
59
+ [2026-03-31 10:17:25,940][__main__][INFO] - Eval loss at epoch 44: 0.4786
60
+ [2026-03-31 10:17:32,914][__main__][INFO] - Saving checkpoint
61
+ [2026-03-31 10:18:25,264][__main__][INFO] - Eval loss at epoch 45: 0.4875
62
+ [2026-03-31 10:19:23,252][__main__][INFO] - Eval loss at epoch 46: 0.4990
63
+ [2026-03-31 10:20:23,505][__main__][INFO] - Eval loss at epoch 47: 0.5173
64
+ [2026-03-31 10:20:26,632][__main__][INFO] - Saving checkpoint
65
+ [2026-03-31 10:21:21,067][__main__][INFO] - Eval loss at epoch 48: 0.4705
66
+ [2026-03-31 10:22:20,641][__main__][INFO] - Eval loss at epoch 49: 0.4964
67
+ [2026-03-31 10:23:16,196][__main__][INFO] - Saving checkpoint
68
+ [2026-03-31 10:23:20,179][__main__][INFO] - Eval loss at epoch 50: 0.4836
69
+ [2026-03-31 10:24:19,462][__main__][INFO] - Eval loss at epoch 51: 0.5069
70
+ [2026-03-31 10:25:18,946][__main__][INFO] - Eval loss at epoch 52: 0.4988
71
+ [2026-03-31 10:26:12,043][__main__][INFO] - Saving checkpoint
72
+ [2026-03-31 10:26:19,038][__main__][INFO] - Eval loss at epoch 53: 0.4600
73
+ [2026-03-31 10:27:17,638][__main__][INFO] - Eval loss at epoch 54: 0.4965
74
+ [2026-03-31 10:28:17,309][__main__][INFO] - Eval loss at epoch 55: 0.4579
75
+ [2026-03-31 10:29:05,862][__main__][INFO] - Saving checkpoint
76
+ [2026-03-31 10:29:16,698][__main__][INFO] - Eval loss at epoch 56: 0.5023
77
+ [2026-03-31 10:30:15,627][__main__][INFO] - Eval loss at epoch 57: 0.5086
78
+ [2026-03-31 10:31:13,689][__main__][INFO] - Eval loss at epoch 58: 0.5095
79
+ [2026-03-31 10:32:03,283][__main__][INFO] - Eval loss at epoch 59: 0.4990
80
+ [2026-03-31 10:32:03,283][__main__][INFO] - Saving final model
81
+ [2026-03-31 10:32:03,744][__main__][INFO] - Final model saved at /workspace/polyphony-seed-vc/runs/my_run-hydra-nfdsnflksdjflkds_2026-03-31_09-33-42/ft_model.pth
models/seed-vc-heb/languages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Hebrew