hunterhogan
/

hunterFormsBS

Model card Files Files and versions

xet

Community

hunterhogan commited on 8 days ago

Commit

c4b1496

verified ·

1 Parent(s): 93927ec

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

bs_roformer_voc_hyperacev2.py +55 -0

bs_roformer_voc_hyperacev2.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import torch
+modelConfiguration = {   'audio': {   'chunk_size': 960000,
+                 'dim_f': 1024,
+                 'dim_t': 801,
+                 'hop_length': 441,
+                 'min_mean_abs': 0.0001,
+                 'n_fft': 2048,
+                 'num_channels': 2,
+                 'sample_rate': 44100},
+    'inference': {'batch_size': 2, 'dim_t': 1876, 'num_overlap': 4},
+    'model': {   'attn_dropout': 0.0,
+                 'depth': 12,
+                 'dim': 256,
+                 'dim_freqs_in': 1025,
+                 'dim_head': 64,
+                 'ff_dropout': 0.0,
+                 'flash_attn': True,
+                 'freq_transformer_depth': 1,
+                 'freqs_per_bands': (   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12,
+                                        12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 48, 48, 48, 48, 48, 48, 48, 48, 128, 129),
+                 'heads': 8,
+                 'linear_transformer_depth': 0,
+                 'mask_estimator_depth': 1,
+                 'mask_filter_bank': torch.tensor(dtype=torch.bool, data=[[1,1]+[0]*1023,[0,0,1,1]+[0]*1021,[0]*4+[1,1]+[0]*1019,[0]*6+[1,1]+[0]*1017,[0]*8+[1,1]+[0]*1015,[0]*10+[1,1]+[0]*1013,[0]*12+[1,1]+[0]*1011,[0]*14+[1,1]+[0]*1009,[0]*16+[1,1]+[0]*1007,[0]*18+[1,1]+[0]*1005,[0]*20+[1,1]+[0]*1003,[0]*22+[1,1]+[0]*1001,[0]*24+[1,1]+[0]*999,[0]*26+[1,1]+[0]*997,[0]*28+[1,1]+[0]*995,[0]*30+[1,1]+[0]*993,[0]*32+[1,1]+[0]*991,[0]*34+[1,1]+[0]*989,[0]*36+[1,1]+[0]*987,[0]*38+[1,1]+[0]*985,[0]*40+[1,1]+[0]*983,[0]*42+[1,1]+[0]*981,[0]*44+[1,1]+[0]*979,[0]*46+[1,1]+[0]*977,[0]*48+[1]*4+[0]*973,[0]*52+[1]*4+[0]*969,[0]*56+[1]*4+[0]*965,[0]*60+[1]*4+[0]*961,[0]*64+[1]*4+[0]*957,[0]*68+[1]*4+[0]*953,[0]*72+[1]*4+[0]*949,[0]*76+[1]*4+[0]*945,[0]*80+[1]*4+[0]*941,[0]*84+[1]*4+[0]*937,[0]*88+[1]*4+[0]*933,[0]*92+[1]*4+[0]*929,[0]*96+[1]*12+[0]*917,[0]*108+[1]*12+[0]*905,[0]*120+[1]*12+[0]*893,[0]*132+[1]*12+[0]*881,[0]*144+[1]*12+[0]*869,[0]*156+[1]*12+[0]*857,[0]*168+[1]*12+[0]*845,[0]*180+[1]*12+[0]*833,[0]*192+[1]*24+[0]*809,[0]*216+[1]*24+[0]*785,[0]*240+[1]*24+[0]*761,[0]*264+[1]*24+[0]*737,[0]*288+[1]*24+[0]*713,[0]*312+[1]*24+[0]*689,[0]*336+[1]*24+[0]*665,[0]*360+[1]*24+[0]*641,[0]*384+[1]*48+[0]*593,[0]*432+[1]*48+[0]*545,[0]*480+[1]*48+[0]*497,[0]*528+[1]*48+[0]*449,[0]*576+[1]*48+[0]*401,[0]*624+[1]*48+[0]*353,[0]*672+[1]*48+[0]*305,[0]*720+[1]*48+[0]*257,[0]*768+[1]*128+[0]*129,[0]*896+[1]*129]),
+                 'mlp_expansion_factor': 4,
+                 'multi_stft_hop_size': 147,
+                 'multi_stft_normalized': False,
+                 'multi_stft_resolution_loss_weight': 1.0,
+                 'multi_stft_resolutions_window_sizes': (4096, 2048, 1024, 512, 256),
+                 'num_stems': 1,
+                 'skip_connection': False,
+                 'stereo': True,
+                 'stft_hop_length': 512,
+                 'stft_n_fft': 2048,
+                 'stft_normalized': False,
+                 'stft_win_length': 1024,
+                 'time_transformer_depth': 1,
+                 'use_torch_checkpoint': True},
+    'training': {   'batch_size': 1,
+                    'coarse_loss_clip': True,
+                    'ema_momentum': 0.999,
+                    'grad_clip': 0,
+                    'gradient_accumulation_steps': 1,
+                    'instruments': ['vocals', 'instrument'],
+                    'lr': 1e-05,
+                    'num_epochs': 1000,
+                    'num_steps': 1000,
+                    'optimizer': 'adam',
+                    'other_fix': False,
+                    'patience': 5,
+                    'q': 0.95,
+                    'reduce_factor': 0.9,
+                    'target_instrument': 'vocals',
+                    'use_amp': True}}