| import streamlit as st |
| import librosa |
| import soundfile as sf |
| import numpy as np |
| import scipy.signal as signal |
| from scipy.io import wavfile |
| import pyworld as world |
| import torch |
| import torchaudio |
| from io import BytesIO |
| import tempfile |
|
|
| def enhance_harmonics(y, sr): |
| |
| y_harmonic = librosa.effects.hpss(y)[0] |
| |
| |
| y_enhanced = y_harmonic * 1.2 + y * 0.3 |
| return librosa.util.normalize(y_enhanced) |
|
|
| def modify_formants(y, sr, formant_shift_factor=1.2): |
| |
| D = librosa.stft(y) |
| S = np.abs(D) |
| |
| |
| order = 12 |
| a = librosa.lpc(y, order) |
| |
| |
| new_a = np.zeros_like(a) |
| new_a[0] = a[0] |
| for i in range(1, len(a)): |
| new_a[i] = a[i] * (formant_shift_factor ** i) |
| |
| |
| y_formant = signal.lfilter([1], new_a, y) |
| return librosa.util.normalize(y_formant) |
|
|
| def process_audio_advanced(audio_file, settings): |
| |
| y, sr = librosa.load(audio_file) |
| |
| |
| _f0, t = librosa.piptrack(y=y, sr=sr) |
| f0 = np.mean(_f0[_f0 > 0], axis=0) |
| |
| |
| y_shifted = librosa.effects.pitch_shift( |
| y, |
| sr=sr, |
| n_steps=settings['pitch_shift'] |
| ) |
| |
| |
| y_formant = modify_formants( |
| y_shifted, |
| sr, |
| settings['formant_shift'] |
| ) |
| |
| |
| y_harmonic = enhance_harmonics(y_formant, sr) |
| |
| |
| y_vtln = librosa.effects.time_stretch( |
| y_harmonic, |
| rate=settings['vtln_factor'] |
| ) |
| |
| |
| y_smooth = signal.savgol_filter(y_vtln, 1001, 2) |
| |
| |
| y_final = librosa.util.normalize(y_smooth) |
| |
| return y_final, sr |
|
|
| def create_voice_preset(preset_name): |
| presets = { |
| 'Young Female': { |
| 'pitch_shift': 8.0, |
| 'formant_shift': 1.3, |
| 'vtln_factor': 1.1, |
| 'breathiness': 0.3 |
| }, |
| 'Mature Female': { |
| 'pitch_shift': 6.0, |
| 'formant_shift': 1.2, |
| 'vtln_factor': 1.05, |
| 'breathiness': 0.2 |
| }, |
| 'Soft Female': { |
| 'pitch_shift': 7.0, |
| 'formant_shift': 1.25, |
| 'vtln_factor': 1.15, |
| 'breathiness': 0.4 |
| } |
| } |
| return presets.get(preset_name) |
|
|
| def add_breathiness(y, sr, amount=0.3): |
| |
| noise = np.random.normal(0, 0.01, len(y)) |
| noise_filtered = signal.lfilter([1], [1, -0.98], noise) |
| |
| |
| y_breathy = y * (1 - amount) + noise_filtered * amount |
| return librosa.util.normalize(y_breathy) |
|
|
| st.title("Advanced Female Voice Converter") |
|
|
| |
| uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) |
|
|
| if uploaded_file is not None: |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
| tmp_file.write(uploaded_file.getvalue()) |
| tmp_path = tmp_file.name |
|
|
| |
| preset_name = st.selectbox( |
| "Select Voice Preset", |
| ['Young Female', 'Mature Female', 'Soft Female', 'Custom'] |
| ) |
|
|
| if preset_name == 'Custom': |
| settings = { |
| 'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5), |
| 'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05), |
| 'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05), |
| 'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1) |
| } |
| else: |
| settings = create_voice_preset(preset_name) |
|
|
| if st.button("Convert Voice"): |
| with st.spinner("Processing audio..."): |
| try: |
| |
| processed_audio, sr = process_audio_advanced(tmp_path, settings) |
| |
| |
| processed_audio = add_breathiness( |
| processed_audio, |
| sr, |
| settings['breathiness'] |
| ) |
| |
| |
| buffer = BytesIO() |
| sf.write(buffer, processed_audio, sr, format='WAV') |
| |
| |
| st.audio(buffer, format='audio/wav') |
| |
| |
| st.download_button( |
| label="Download Converted Audio", |
| data=buffer, |
| file_name="female_voice_converted.wav", |
| mime="audio/wav" |
| ) |
| |
| except Exception as e: |
| st.error(f"Error processing audio: {str(e)}") |
|
|
| st.markdown(""" |
| ### Advanced Features: |
| - Formant preservation and shifting |
| - Harmonic enhancement |
| - Vocal tract length normalization |
| - Natural breathiness addition |
| - Multiple voice presets |
| - Custom parameter adjustment |
| |
| ### Tips for Best Results: |
| 1. Use high-quality input audio |
| 2. Start with presets and adjust if needed |
| 3. For custom settings: |
| - Pitch shift: 6-8 for natural female voice |
| - Formant shift: 1.1-1.3 for feminine resonance |
| - Vocal tract length: 1.05-1.15 for realistic results |
| - Breathiness: 0.2-0.4 for natural sound |
| """) |