Mahson / README.

Rename README.zip to README.

6cb9439 verified about 1 year ago

5.08 kB

	[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM').
	[~] Starting AI Cover Generation Pipeline...
	[~] Separating Vocals from Instrumental...
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:07, 1.52it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.59it/s]
	25%\|██▌ \| 3/12 [00:01<00:03, 2.28it/s]
	33%\|███▎ \| 4/12 [00:01<00:03, 2.30it/s]
	50%\|█████ \| 6/12 [00:02<00:01, 3.15it/s]
	67%\|██████▋ \| 8/12 [00:02<00:01, 3.67it/s]
	83%\|████████▎ \| 10/12 [00:03<00:00, 4.02it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 4.24it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 3.14it/s]
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:06, 1.64it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.58it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 2.70it/s]
	50%\|█████ \| 6/12 [00:02<00:01, 3.34it/s]
	67%\|██████▋ \| 8/12 [00:02<00:01, 3.77it/s]
	83%\|████████▎ \| 10/12 [00:02<00:00, 4.05it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 4.26it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 3.31it/s]
	[~] Separating Main Vocals from Backup Vocals...
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:07, 1.56it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.61it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 3.64it/s]
	42%\|████▏ \| 5/12 [00:01<00:01, 3.60it/s]
	58%\|█████▊ \| 7/12 [00:01<00:01, 4.78it/s]
	75%\|███████▌ \| 9/12 [00:02<00:00, 5.59it/s]
	92%\|█████████▏\| 11/12 [00:02<00:00, 6.18it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 4.39it/s]
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:06, 1.67it/s]
	17%\|█▋ \| 2/12 [00:01<00:05, 1.67it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 3.28it/s]
	50%\|█████ \| 6/12 [00:01<00:01, 4.48it/s]
	67%\|██████▋ \| 8/12 [00:01<00:00, 5.33it/s]
	83%\|████████▎ \| 10/12 [00:02<00:00, 5.99it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 6.47it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 4.50it/s]
	[~] Applying DeReverb to Vocals...
	0it [00:00, ?it/s]
	17%\|█▋ \| 1/6 [00:00<00:03, 1.52it/s]
	33%\|███▎ \| 2/6 [00:01<00:02, 1.56it/s]
	50%\|█████ \| 3/6 [00:02<00:02, 1.28it/s]
	83%\|████████▎ \| 5/6 [00:03<00:00, 1.75it/s]
	100%\|██████████\| 6/6 [00:03<00:00, 1.54it/s]
	0it [00:00, ?it/s]
	17%\|█▋ \| 1/6 [00:00<00:03, 1.56it/s]
	33%\|███▎ \| 2/6 [00:01<00:02, 1.57it/s]
	67%\|██████▋ \| 4/6 [00:02<00:00, 2.02it/s]
	100%\|██████████\| 6/6 [00:02<00:00, 2.20it/s]
	100%\|██████████\| 6/6 [00:03<00:00, 1.80it/s]
	[~] Converting voice using RVC...
	2025-04-30 10:10:42 \| INFO \| fairseq.tasks.hubert_pretraining \| current directory is /src
	2025-04-30 10:10:42 \| INFO \| fairseq.tasks.hubert_pretraining \| HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}
	2025-04-30 10:10:42 \| INFO \| fairseq.models.hubert.hubert \| HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}
	gin_channels: 256 self.spk_embed_dim: 109
	<All keys matched successfully>
	[~] Applying audio effects to Vocals...
	[~] Combining AI Vocals and Instrumentals...
	[~] Removing intermediate audio files...
	[+] Cover generated at /src/song_output/865dfcbe0e9/tmpciufh8h5Mahsun mp3_23-04-25_15-50-36-788 (Squidward Ver).mp3

	[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM').
	[~] Starting AI Cover Generation Pipeline...
	[~] Separating Vocals from Instrumental...
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:07, 1.52it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.59it/s]
	25%\|██▌ \| 3/12 [00:01<00:03, 2.28it/s]
	33%\|███▎ \| 4/12 [00:01<00:03, 2.30it/s]
	50%\|█████ \| 6/12 [00:02<00:01, 3.15it/s]
	67%\|██████▋ \| 8/12 [00:02<00:01, 3.67it/s]
	83%\|████████▎ \| 10/12 [00:03<00:00, 4.02it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 4.24it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 3.14it/s]
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:06, 1.64it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.58it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 2.70it/s]
	50%\|█████ \| 6/12 [00:02<00:01, 3.34it/s]
	67%\|██████▋ \| 8/12 [00:02<00:01, 3.77it/s]
	83%\|████████▎ \| 10/12 [00:02<00:00, 4.05it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 4.26it/s]
	100%\|██████████\| 12/12 [00:03<00:00, 3.31it/s]
	[~] Separating Main Vocals from Backup Vocals...
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:07, 1.56it/s]
	17%\|█▋ \| 2/12 [00:01<00:06, 1.61it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 3.64it/s]
	42%\|████▏ \| 5/12 [00:01<00:01, 3.60it/s]
	58%\|█████▊ \| 7/12 [00:01<00:01, 4.78it/s]
	75%\|███████▌ \| 9/12 [00:02<00:00, 5.59it/s]
	92%\|█████████▏\| 11/12 [00:02<00:00, 6.18it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 4.39it/s]
	0it [00:00, ?it/s]
	8%\|▊ \| 1/12 [00:00<00:06, 1.67it/s]
	17%\|█▋ \| 2/12 [00:01<00:05, 1.67it/s]
	33%\|███▎ \| 4/12 [00:01<00:02, 3.28it/s]
	50%\|█████ \| 6/12 [00:01<00:01, 4.48it/s]
	67%\|██████▋ \| 8/12 [00:01<00:00, 5.33it/s]
	83%\|████████▎ \| 10/12 [00:02<00:00, 5.99it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 6.47it/s]
	100%\|██████████\| 12/12 [00:02<00:00, 4.50it/s]
	[~] Applying DeReverb to Vocals...
	0it [00:00, ?it/s]
	17%\|█▋ \| 1/6 [00:00<00:03, 1.52it/s]
	33%\|███▎ \| 2/6 [00:01<00:02, 1.56it/s]
	50%\|█████ \| 3/6 [00:02<00:02, 1.28it/s]
	83%\|████████▎ \| 5/6 [00:03<00:00, 1.75it/s]
	100%\|██████████\| 6/6 [00:03<00:00, 1.54it/s]
	0it [00:00, ?it/s]
	17%\|█▋ \| 1/6 [00:00<00:03, 1.56it/s]
	33%\|███▎ \| 2/6 [00:01<00:02, 1.57it/s]
	67%\|██████▋ \| 4/6 [00:02<00:00, 2.02it/s]
	100%\|██████████\| 6/6 [00:02<00:00, 2.20it/s]
	100%\|██████████\| 6/6 [00:03<00:00, 1.80it/s]
	[~] Converting voice using RVC...
	2025-04-30 10:10:42 \| INFO \| fairseq.tasks.hubert_pretraining \| current directory is /src
	2025-04-30 10:10:42 \| INFO \| fairseq.tasks.hubert_pretraining \| HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}
	2025-04-30 10:10:42 \| INFO \| fairseq.models.hubert.hubert \| HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}
	gin_channels: 256 self.spk_embed_dim: 109
	<All keys matched successfully>
	[~] Applying audio effects to Vocals...
	[~] Combining AI Vocals and Instrumentals...
	[~] Removing intermediate audio files...
	[+] Cover generated at /src/song_output/865dfcbe0e9/tmpciufh8h5Mahsun mp3_23-04-25_15-50-36-788 (Squidward Ver).mp3