from Imports import * from Configuration import * def ResBlock_v1(x, filters, kernel_size, dilation_rates=(1, 3, 5)): """ Standard HiFi-GAN ResBlock. Contains 3 cycles of convolutions. """ for d in dilation_rates: shortcut = x # Sub-cycle 1 x = layers.LeakyReLU(0.1)(x) x = layers.Conv1D(filters, kernel_size, dilation_rate=d, padding='same')(x) # Sub-cycle 2 x = layers.LeakyReLU(0.1)(x) x = layers.Conv1D(filters, kernel_size, dilation_rate=1, padding='same')(x) # Residual connection x = x + shortcut return x def MRF_Module(x, filters): """ Multi-Receptive Field Fusion. Runs 3 ResBlocks in parallel and averages their output. """ # Parallel paths with kernel sizes 3, 7, and 11 res1 = ResBlock_v1(x, filters, kernel_size=3, dilation_rates=(1, 3, 5)) res2 = ResBlock_v1(x, filters, kernel_size=7, dilation_rates=(1, 3, 5)) res3 = ResBlock_v1(x, filters, kernel_size=11, dilation_rates=(1, 3, 5)) # Average the three paths (or Add, then divide by 3) return (res1 + res2 + res3) / 3 def build_generator(input_shape=(None, 80)): mel_input = keras.layers.Input(shape=input_shape) # [B, T_mel, 80] # Initial Convolution x = layers.Conv1D(512, kernel_size=7, padding='same')(mel_input) # Standard HiFi-GAN upsampling ratios: [8, 8, 2, 2] # Standard channels: [256, 128, 64, 32] upsample_rates = [8, 8, 2, 2] upsample_kernels = [16, 16, 4, 4] channels = [256, 128, 64, 32] for i in range(len(upsample_rates)): x = layers.LeakyReLU(0.1)(x) # Upsample x = layers.Conv1DTranspose( channels[i], kernel_size=upsample_kernels[i], strides=upsample_rates[i], padding='same' )(x) # Apply Multi-Receptive Field Fusion (Parallel blocks) x = MRF_Module(x, channels[i]) # Final output layer x = layers.LeakyReLU(0.1)(x) x = layers.Conv1D(1, kernel_size=7, padding='same', activation='tanh')(x) return keras.models.Model(mel_input, x, name="HiFiGAN_Generator_V1")