| | import streamlit as st |
| | import librosa |
| | import soundfile as sf |
| | import numpy as np |
| | import scipy.signal as signal |
| | from scipy.io import wavfile |
| | from io import BytesIO |
| | import tempfile |
| |
|
| | def modify_formants(y, sr, formant_shift_factor=1.2): |
| | |
| | D = librosa.stft(y) |
| | S = np.abs(D) |
| | |
| | |
| | frame_length = 2048 |
| | hop_length = 512 |
| | frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length) |
| | |
| | |
| | modified_frames = [] |
| | for frame in frames.T: |
| | |
| | a = librosa.lpc(frame, order=12) |
| | |
| | |
| | new_a = np.zeros_like(a) |
| | new_a[0] = a[0] |
| | for i in range(1, len(a)): |
| | new_a[i] = a[i] * (formant_shift_factor ** i) |
| | |
| | |
| | modified_frame = signal.lfilter([1], new_a, frame) |
| | modified_frames.append(modified_frame) |
| | |
| | |
| | y_formant = np.concatenate([frame[:hop_length] for frame in modified_frames[:-1]] + |
| | [modified_frames[-1]]) |
| | |
| | return librosa.util.normalize(y_formant) |
| |
|
| | def enhance_harmonics(y, sr): |
| | |
| | y_harmonic = librosa.effects.hpss(y)[0] |
| | |
| | |
| | y_enhanced = y_harmonic * 1.2 + y * 0.3 |
| | return librosa.util.normalize(y_enhanced) |
| |
|
| | def process_audio_advanced(audio_file, settings): |
| | |
| | y, sr = librosa.load(audio_file) |
| | |
| | |
| | y_shifted = librosa.effects.pitch_shift( |
| | y, |
| | sr=sr, |
| | n_steps=settings['pitch_shift'] |
| | ) |
| | |
| | |
| | y_formant = modify_formants( |
| | y_shifted, |
| | sr, |
| | settings['formant_shift'] |
| | ) |
| | |
| | |
| | y_harmonic = enhance_harmonics(y_formant, sr) |
| | |
| | |
| | y_vtln = librosa.effects.time_stretch( |
| | y_harmonic, |
| | rate=settings['vtln_factor'] |
| | ) |
| | |
| | |
| | y_smooth = signal.savgol_filter(y_vtln, 1001, 2) |
| | |
| | |
| | y_final = librosa.util.normalize(y_smooth) |
| | |
| | return y_final, sr |
| |
|
| | def create_voice_preset(preset_name): |
| | presets = { |
| | 'Young Female': { |
| | 'pitch_shift': 8.0, |
| | 'formant_shift': 1.3, |
| | 'vtln_factor': 1.1, |
| | 'breathiness': 0.3 |
| | }, |
| | 'Mature Female': { |
| | 'pitch_shift': 6.0, |
| | 'formant_shift': 1.2, |
| | 'vtln_factor': 1.05, |
| | 'breathiness': 0.2 |
| | }, |
| | 'Soft Female': { |
| | 'pitch_shift': 7.0, |
| | 'formant_shift': 1.25, |
| | 'vtln_factor': 1.15, |
| | 'breathiness': 0.4 |
| | } |
| | } |
| | return presets.get(preset_name) |
| |
|
| | def add_breathiness(y, sr, amount=0.3): |
| | |
| | noise = np.random.normal(0, 0.01, len(y)) |
| | noise_filtered = signal.lfilter([1], [1, -0.98], noise) |
| | |
| | |
| | y_breathy = y * (1 - amount) + noise_filtered * amount |
| | return librosa.util.normalize(y_breathy) |
| |
|
| | st.title("Advanced Female Voice Converter") |
| |
|
| | |
| | uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) |
| |
|
| | if uploaded_file is not None: |
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
| | tmp_file.write(uploaded_file.getvalue()) |
| | tmp_path = tmp_file.name |
| |
|
| | |
| | preset_name = st.selectbox( |
| | "Select Voice Preset", |
| | ['Young Female', 'Mature Female', 'Soft Female', 'Custom'] |
| | ) |
| |
|
| | if preset_name == 'Custom': |
| | settings = { |
| | 'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5), |
| | 'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05), |
| | 'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05), |
| | 'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1) |
| | } |
| | else: |
| | settings = create_voice_preset(preset_name) |
| |
|
| | if st.button("Convert Voice"): |
| | with st.spinner("Processing audio..."): |
| | try: |
| | |
| | processed_audio, sr = process_audio_advanced(tmp_path, settings) |
| | |
| | |
| | processed_audio = add_breathiness( |
| | processed_audio, |
| | sr, |
| | settings['breathiness'] |
| | ) |
| | |
| | |
| | buffer = BytesIO() |
| | sf.write(buffer, processed_audio, sr, format='WAV') |
| | |
| | |
| | st.audio(buffer, format='audio/wav') |
| | |
| | |
| | st.download_button( |
| | label="Download Converted Audio", |
| | data=buffer, |
| | file_name="female_voice_converted.wav", |
| | mime="audio/wav" |
| | ) |
| | |
| | except Exception as e: |
| | st.error(f"Error processing audio: {str(e)}") |
| |
|
| | st.markdown(""" |
| | ### Voice Conversion Features: |
| | - Pitch shifting with formant preservation |
| | - Harmonic enhancement |
| | - Vocal tract length modification |
| | - Natural breathiness addition |
| | - Multiple voice presets |
| | - Custom parameter controls |
| | |
| | ### Tips for Best Results: |
| | 1. Start with a clear audio recording |
| | 2. Try different presets to find the best match |
| | 3. For custom settings: |
| | - Pitch shift: 6-8 for natural female voice |
| | - Formant shift: 1.1-1.3 for feminine resonance |
| | - Vocal tract length: 1.05-1.15 for realistic results |
| | - Breathiness: 0.2-0.4 for natural sound |
| | """) |