hunterhogan
/

hunterFormsBS

Model card Files Files and versions

hunterFormsBS / bs_roformer_revive2.py

hunterhogan's picture

Upload folder using huggingface_hub

157b122 verified 25 days ago

history blame contribute delete

2.71 kB

	modelConfiguration = { 'audio': { 'chunk_size': 485100,
	'dim_f': 1024,
	'dim_t': 1101,
	'hop_length': 441,
	'min_mean_abs': 0.0,
	'n_fft': 2048,
	'num_channels': 2,
	'sample_rate': 44100},
	'inference': {'batch_size': 4, 'dim_t': 1101, 'num_overlap': 2},
	'model': { 'attn_dropout': 0.0,
	'depth': 12,
	'dim': 512,
	'dim_freqs_in': 1025,
	'dim_head': 64,
	'ff_dropout': 0.0,
	'flash_attn': True,
	'freq_transformer_depth': 1,
	'freqs_per_bands': ( 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12,
	12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 48, 48, 48, 48, 48, 48, 48, 48, 128, 129),
	'heads': 8,
	'linear_transformer_depth': 0,
	'mask_estimator_depth': 1,
	'multi_stft_hop_size': 147,
	'multi_stft_normalized': False,
	'multi_stft_resolution_loss_weight': 1.0,
	'multi_stft_resolutions_window_sizes': (4096, 2048, 1024, 512, 256),
	'num_stems': 1,
	'stereo': True,
	'stft_hop_length': 441,
	'stft_n_fft': 2048,
	'stft_normalized': False,
	'stft_win_length': 882,
	'time_transformer_depth': 1},
	'training': { 'augmentation': False,
	'augmentation_loudness': False,
	'augmentation_loudness_max': 0,
	'augmentation_loudness_min': 0,
	'augmentation_loudness_type': 1,
	'augmentation_mix': False,
	'augmentation_type': None,
	'batch_size': 1,
	'coarse_loss_clip': False,
	'ema_momentum': 0.999,
	'grad_clip': 0,
	'gradient_accumulation_steps': 1,
	'instruments': ['vocals', 'other'],
	'lr': 1e-05,
	'num_epochs': 1000,
	'num_steps': 1000,
	'optimizer': 'adam',
	'other_fix': True,
	'patience': 2,
	'q': 0.95,
	'reduce_factor': 0.95,
	'target_instrument': 'vocals',
	'use_amp': True,
	'use_mp3_compress': False}}