| | import streamlit as st |
| | import librosa |
| | import soundfile as sf |
| | import numpy as np |
| | import scipy.signal as signal |
| | from io import BytesIO |
| | import tempfile |
| |
|
| | def pitch_shift_with_formant_preservation(y, sr, n_steps): |
| | |
| | frame_length = 1024 |
| | hop_length = 256 |
| | |
| | |
| | y_shifted = librosa.effects.pitch_shift( |
| | y=y, |
| | sr=sr, |
| | n_steps=n_steps, |
| | bins_per_octave=12, |
| | res_type='kaiser_fast' |
| | ) |
| | |
| | return y_shifted |
| |
|
| | def enhance_female_characteristics(y, sr, settings): |
| | |
| | y_harmonic, y_percussive = librosa.effects.hpss( |
| | y, |
| | margin=3.0, |
| | kernel_size=31 |
| | ) |
| | |
| | |
| | y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost']) |
| | |
| | |
| | y_filtered = apply_female_eq(y_enhanced, sr) |
| | |
| | return y_filtered |
| |
|
| | def apply_female_eq(y, sr): |
| | |
| | |
| | b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band') |
| | y_filtered = signal.filtfilt(b1, a1, y) |
| | |
| | |
| | b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band') |
| | y_filtered += 0.3 * signal.filtfilt(b2, a2, y) |
| | |
| | return librosa.util.normalize(y_filtered) |
| |
|
| | def add_breathiness(y, sr, amount): |
| | |
| | noise = np.random.normal(0, 0.005, len(y)) |
| | |
| | |
| | b, a = signal.butter(2, 2000/(sr/2), btype='lowpass') |
| | breath_noise = signal.filtfilt(b, a, noise) |
| | |
| | |
| | y_breathy = y * (1 - amount) + breath_noise * amount |
| | return librosa.util.normalize(y_breathy) |
| |
|
| | def process_audio_advanced(audio_file, settings): |
| | |
| | y, sr = librosa.load(audio_file, sr=24000) |
| | |
| | |
| | y = librosa.util.normalize(y - np.mean(y)) |
| | |
| | |
| | y_shifted = pitch_shift_with_formant_preservation( |
| | y, |
| | sr, |
| | settings['pitch_shift'] |
| | ) |
| | |
| | |
| | y_enhanced = enhance_female_characteristics(y_shifted, sr, settings) |
| | |
| | |
| | if settings['breathiness'] > 0: |
| | y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness']) |
| | |
| | |
| | y_final = librosa.util.normalize(y_enhanced) |
| | |
| | |
| | y_final = signal.savgol_filter(y_final, 1001, 2) |
| | |
| | return y_final, sr |
| |
|
| | def create_voice_preset(preset_name): |
| | presets = { |
| | 'Young Female': { |
| | 'pitch_shift': 4.0, |
| | 'harmonic_boost': 0.3, |
| | 'breathiness': 0.15 |
| | }, |
| | 'Mature Female': { |
| | 'pitch_shift': 3.0, |
| | 'harmonic_boost': 0.2, |
| | 'breathiness': 0.1 |
| | }, |
| | 'Soft Female': { |
| | 'pitch_shift': 3.5, |
| | 'harmonic_boost': 0.25, |
| | 'breathiness': 0.2 |
| | } |
| | } |
| | return presets.get(preset_name) |
| |
|
| | st.title("Improved Female Voice Converter") |
| |
|
| | uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) |
| |
|
| | if uploaded_file is not None: |
| | with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
| | tmp_file.write(uploaded_file.getvalue()) |
| | tmp_path = tmp_file.name |
| |
|
| | preset_name = st.selectbox( |
| | "Select Voice Preset", |
| | ['Young Female', 'Mature Female', 'Soft Female', 'Custom'] |
| | ) |
| |
|
| | if preset_name == 'Custom': |
| | settings = { |
| | 'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5), |
| | 'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05), |
| | 'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05) |
| | } |
| | else: |
| | settings = create_voice_preset(preset_name) |
| |
|
| | if st.button("Convert Voice"): |
| | with st.spinner("Processing audio..."): |
| | try: |
| | processed_audio, sr = process_audio_advanced(tmp_path, settings) |
| | |
| | |
| | buffer = BytesIO() |
| | sf.write(buffer, processed_audio, sr, format='WAV') |
| | |
| | |
| | st.audio(buffer, format='audio/wav') |
| | |
| | |
| | st.download_button( |
| | label="Download Converted Audio", |
| | data=buffer, |
| | file_name="female_voice_converted.wav", |
| | mime="audio/wav" |
| | ) |
| | |
| | except Exception as e: |
| | st.error(f"Error processing audio: {str(e)}") |
| |
|
| | st.markdown(""" |
| | ### Tips for Best Results: |
| | 1. Use high-quality input audio with clear speech |
| | 2. Start with presets and adjust if needed |
| | 3. Keep pitch shift between 3-5 for most natural results |
| | 4. Use minimal breathiness (0.1-0.2) for realistic sound |
| | 5. Record in a quiet environment with minimal background noise |
| | |
| | ### Recommended Settings: |
| | - For younger female voice: pitch shift 4.0, harmonic boost 0.3 |
| | - For mature female voice: pitch shift 3.0, harmonic boost 0.2 |
| | - For soft female voice: pitch shift 3.5, harmonic boost 0.25 |
| | """) |