hunterhogan
/

hunterFormsBS

Model card Files Files and versions

xet

Community

hunterhogan commited on 7 days ago

Commit

6fcb60a

verified ·

1 Parent(s): 727e1b0

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

bs_roformer_voc_hyperacev2.py +1 -1
config_deux_becruily.py +48 -48

bs_roformer_voc_hyperacev2.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
-modelConfiguration = {   'audio': {   'chunk_size': 960000,
 				'dim_f': 1024,
 				'dim_t': 801,
 				'hop_length': 441,

 import torch
+modelConfiguration = {   'audio': {   'chunk_size': 2 ** 20,
 				'dim_f': 1024,
 				'dim_t': 801,
 				'hop_length': 441,

config_deux_becruily.py CHANGED Viewed

@@ -1,48 +1,48 @@
-modelConfiguration = {   'audio': {   'chunk_size': 573300,
-                 'dim_f': 1024,
-                 'dim_t': 256,
-                 'hop_length': 441,
-                 'min_mean_abs': 0.0,
-                 'n_fft': 2048,
-                 'num_channels': 2,
-                 'sample_rate': 44100},
-    'inference': {'batch_size': 4, 'dim_t': 1101, 'num_overlap': 2},
-    'model': {   'attn_dropout': 0,
-                 'depth': 12,
-                 'dim': 256,
-                 'dim_freqs_in': 1025,
-                 'dim_head': 64,
-                 'ff_dropout': 0,
-                 'flash_attn': True,
-                 'freq_transformer_depth': 1,
-                 'heads': 8,
-                 'mask_estimator_depth': 2,
-                 'multi_stft_hop_size': 147,
-                 'multi_stft_normalized': False,
-                 'multi_stft_resolution_loss_weight': 1.0,
-                 'multi_stft_resolutions_window_sizes': (4096, 2048, 1024, 512, 256),
-                 'num_bands': 60,
-                 'num_stems': 2,
-                 'sample_rate': 44100,
-                 'stereo': True,
-                 'stft_hop_length': 441,
-                 'stft_n_fft': 2048,
-                 'stft_normalized': False,
-                 'stft_win_length': 882,
-                 'time_transformer_depth': 1},
-    'training': {   'batch_size': 1,
-                    'coarse_loss_clip': False,
-                    'ema_momentum': 0.999,
-                    'grad_clip': 0,
-                    'gradient_accumulation_steps': 1,
-                    'instruments': ['vocals', 'instrum'],
-                    'lr': 0.0001,
-                    'num_epochs': 1000,
-                    'num_steps': 1000,
-                    'optimizer': 'adamw',
-                    'other_fix': False,
-                    'patience': 2,
-                    'q': 0.95,
-                    'reduce_factor': 0.95,
-                    'target_instrument': None,
-                    'use_amp': True}}

+modelConfiguration = {   'audio': {   'chunk_size': 2 ** 20,
+				'dim_f': 1024,
+				'dim_t': 256,
+				'hop_length': 441,
+				'min_mean_abs': 0.0,
+				'n_fft': 2048,
+				'num_channels': 2,
+				'sample_rate': 44100},
+	'inference': {'batch_size': 4, 'dim_t': 1101, 'num_overlap': 2},
+	'model': {   'attn_dropout': 0,
+				'depth': 12,
+				'dim': 256,
+				'dim_freqs_in': 1025,
+				'dim_head': 64,
+				'ff_dropout': 0,
+				'flash_attn': True,
+				'freq_transformer_depth': 1,
+				'heads': 8,
+				'mask_estimator_depth': 2,
+				'multi_stft_hop_size': 147,
+				'multi_stft_normalized': False,
+				'multi_stft_resolution_loss_weight': 1.0,
+				'multi_stft_resolutions_window_sizes': (4096, 2048, 1024, 512, 256),
+				'num_bands': 60,
+				'num_stems': 2,
+				'sample_rate': 44100,
+				'stereo': True,
+				'stft_hop_length': 512,
+				'stft_n_fft': 2048,
+				'stft_normalized': False,
+				'stft_win_length': 1024,
+				'time_transformer_depth': 1},
+	'training': {   'batch_size': 1,
+					'coarse_loss_clip': False,
+					'ema_momentum': 0.999,
+					'grad_clip': 0,
+					'gradient_accumulation_steps': 1,
+					'instruments': ['vocals', 'instrum'],
+					'lr': 0.0001,
+					'num_epochs': 1000,
+					'num_steps': 1000,
+					'optimizer': 'adamw',
+					'other_fix': False,
+					'patience': 2,
+					'q': 0.95,
+					'reduce_factor': 0.95,
+					'target_instrument': None,
+					'use_amp': True}}